# ## Clear R-workspace
# rm(list=ls(all=TRUE))

## Close all graphic devices
graphics.off()
#####################
### Load packages ###
#####################
library(pacman)

pacman::p_load(extrafont,DT,dplyr,tibble,ggplot2, hrbrthemes, grid, gridExtra, cowplot,Cairo,
               tidyverse,ggpubr, ggrepel,cowplot,
               RColorBrewer, pander,gridExtra, survival, survminer, caret, precrec, 
               reconPlots,flextable, extrafont,readxl, reshape2,rstatix,ggbeeswarm,
               cowplot,tidyquant,zoo,plyr,xlsx,corrr,ggnet,ggnetwork,
               tidygraph,ggraph,intergraph,clusterProfiler,AnnotationDbi,org.Hs.eg.db,rrvgo,data.table,GO.db,multienrichjam, DOSE, openxlsx,GOfuncR, officer)

extrafont::loadfonts(device="win")
windowsFonts(sans="Palatino Linotype")
loadfonts(device="win")
loadfonts(device="postscript")
###########################
### Main analysis paths ###
###########################
# Main
scriptsPath <- paste0("scripts/")
scriptsFunctionsPath <- paste0(scriptsPath,"functions/")
projectDataPath <- paste0("data/")
# Input
tcgaInputData <- paste0(projectDataPath,"TCGA/")
antiPDL1publicInputData <- paste0(projectDataPath,"antiPD(L)1_public/")
otherInputData <- paste0(projectDataPath,"other/")
referenceInputData <- paste0(projectDataPath,"reference/")
# Output
projectOutDataPath <- paste0("output/data_files/")
tcgaIntermediateData <- paste0(projectOutDataPath,"TCGA/")
antiPDL1publicIntermediateData <- paste0(projectOutDataPath,"antiPD(L)1_public/")
# Output for differential expression analysis and gene ontology (similarity)
DEA_GOoutData <- paste0(projectOutDataPath,"DEA_GO/")
# Output for survival analysis 
survivalOutData <- paste0(projectOutDataPath,"survival/")

figuresPath <- paste0("output/figures/")
tablesPath <- paste0("output/tables/")
modelsPath <- paste0("output/models/")
# Session/dependencies
sessionInfoPath <- paste0("session_info/")

# Other intermediate output
testDataOut <-paste0(modelsPath,"test_data/")
rocCompareOut <-paste0(modelsPath,"roc_compare/")
modelsInterm <- paste0(modelsPath,"models_interm_files/")

##################################
### LOAD SOURCE FUNCTIONS FILE ###
##################################
source(paste0(scriptsFunctionsPath,"paper_figures_tables_files_functions.R"))


#####################
### File suffixes ###
#####################
Rdata.suffix <- ".RData"

# Other script variables
modelTypeA <- "_TCRArichnessIGH.ds"
modelTypeB <- "_noLIU"
modelTypeC <- "_mintcr4bcr10"

## Other script parameters ##
runFiltSim <- TRUE

1 Data loading

1.1 Reference data

geneid2symb <- read.table(file.path(paste0(referenceInputData, "genID2name.csv")), header = TRUE, sep = ",")
geneid2symb$gene_id <- as.character(geneid2symb$gene_id)
geneid2symb$gene_name <- as.character(geneid2symb$gene_name)

1.2 TCGA DATA

tcgaData.tp <- loadRData(paste0(tcgaIntermediateData, "tcga.tp.Filt.final",Rdata.suffix))
# tcga.drug.notICI <-loadRData(paste0(tcgaIntermediateData, "tcga.tp.nonFilt.final_ICI4Tissues.RData")) # NOT USED
tcga.drug.ici.To.Exclude2 <- loadRData(paste0(tcgaIntermediateData, "tcga_patientsNOTici_toEXCLUDE_4Tissues.RData"))
# ###################
# # Compare with init TCGA
# tcgaData.tp.init <- loadRData(paste0("C:/Users/aimilia/BIOINF/1_DATA/3_TCGA/ClinicalDataTables/RData/", "tcga.tp.Filt.final",Rdata.suffix))
# 
# median(tcgaData.tp.init$TIS_sigscore, na.rm = T)
# median(tcgaData.tp$TIS_sigscore, na.rm = T)
# 
# median(tcgaData.tp.init$BCR_Richness, na.rm = T)
# median(tcgaData.tp$BCR_Richness, na.rm = T)
##################


table(tcgaData.tp$project)
## 
##           ACC          BLCA          BRCA     BRCA_Her2 BRCA_LuminalA 
##            92           412          1099            82           568 
## BRCA_LuminalB   BRCA_Normal       BRCA_TN          CESC          CHOL 
##           217           140           191           308            45 
##          COAD      COAD_MSI  COAD_MSI_low      COAD_MSS          DLBC 
##           461            78            80           275            50 
##          ESCA           GBM          HNSC          KICH          KIRC 
##           185           611           528           113           537 
##          KIRP          LAML           LGG          LIHC          LUAD 
##           291           200           516           377           583 
##          LUSC          MESO            OV          PAAD          PCPG 
##           504            87           606           185           179 
##          PRAD          READ          SARC          SKCM          STAD 
##           500           171           261           470           443 
##          TGCT          THCA          THYM          UCEC           UCS 
##           150           507           124           560            57 
##           UVM 
##            80
# table(tcga.drug.notICI$project) # NOT USED

# See which patients we need to remove
tcgaData.tp %>% dplyr::filter(bcr_patient_barcode %in% tcga.drug.ici.To.Exclude2) %>% droplevels() %>% distinct() %>% dplyr::select(bcr_patient_barcode, project) %>% distinct()
##     bcr_patient_barcode project
##  1:        TCGA-DK-AA77    BLCA
##  2:        TCGA-ER-A2NF    SKCM
##  3:        TCGA-GN-A8LN    SKCM
##  4:        TCGA-D9-A148    SKCM
##  5:        TCGA-DA-A1I1    SKCM
##  6:        TCGA-DA-A3F2    SKCM
##  7:        TCGA-DA-A3F5    SKCM
##  8:        TCGA-EE-A2GS    SKCM
##  9:        TCGA-EE-A3JI    SKCM
## 10:        TCGA-ER-A2NG    SKCM
## 11:        TCGA-FR-A3YN    SKCM
## 12:        TCGA-FR-A3YO    SKCM
## 13:        TCGA-FR-A69P    SKCM
## 14:        TCGA-FR-A7U9    SKCM
## 15:        TCGA-FR-A8YD    SKCM
## 16:        TCGA-GN-A4U4    SKCM
## 17:        TCGA-GN-A4U9    SKCM
## 18:        TCGA-GN-A8LK    SKCM
## 19:        TCGA-QB-AA9O    SKCM
## 20:        TCGA-WE-A8K5    SKCM
## 21:        TCGA-WE-AAA0    SKCM
## 22:        TCGA-WE-AAA4    SKCM
##     bcr_patient_barcode project
# Remove patients that received aPD1 immunotherapy.
tcgaData.tp.clean <- tcgaData.tp %>% dplyr::filter(bcr_patient_barcode %notin% tcga.drug.ici.To.Exclude2) %>% 
                                      droplevels() %>% 
                                      distinct()
dim(tcgaData.tp)
## [1] 12923   123
dim(tcgaData.tp.clean)
## [1] 12901   123
featuresSelected <- c("bcr_patient_barcode", "project","PD_L1","logTMB","BCR_Richness","TCR_Richness",
                      "B_cells.CIBER", "T_Cells_CD8.CIBER", "CD4cells.CIBER","TIS_sigscore", "TumorPurity","OS","OS.time","PFI","PFI.time","age_at_initial_pathologic_diagnosis","gender")

extraFeatures <- c("StromalScore", "ImmuneScore")

featuresIn <- c("TCR_Richness","BCR_Richness","logTMB","PD_L1","TIS_sigscore",
                      "CD8cells", "CD4cells","B_cells",  "TumorPurity")
featuresInLabels <- c("TCR Richness","BCR Richness","logTMB","PD-L1 expression","T-cell inflamed GEP", "CD8+ T-cells infiltration","CD4+ T-cells infiltration","B cells infiltration",
            "Tumor Purity")
## FOR TP data
tcgaData.tp.sub <- tcgaData.tp.clean %>% dplyr::select(all_of(featuresSelected)) %>% droplevels() %>% distinct()
tcgaData.tp.sub.extra <- tcgaData.tp.clean %>% dplyr::select(all_of(c(featuresSelected,extraFeatures))) %>% droplevels() %>% distinct()

colnames(tcgaData.tp.sub)[7:9] <- gsub(".CIBER","",colnames(tcgaData.tp.sub)[7:9])
colnames(tcgaData.tp.sub)[8] <- "CD8cells"

# Keep rows where at least on of the features of interested is not NA
tcgaData.tp.sub <-tcgaData.tp.sub[rowSums(is.na(tcgaData.tp.sub[,3:11])) != 9,]
tcgaData.tp.sub.extra <-tcgaData.tp.sub.extra[rowSums(is.na(tcgaData.tp.sub.extra[,3:11])) != 9,]
dim(tcgaData.tp.sub)
## [1] 12385    17
dim(tcgaData.tp.sub.extra)
## [1] 12385    19
mycancertypes <- levels(as.factor(tcgaData.tp.sub$project))
## Exclude THYM and DLBC
mycancertypes.reduced <- mycancertypes[-which(mycancertypes %in% c("THYM", "DLBC","COAD_MSI_low","BRCA_Her2","BRCA_LuminalA", "BRCA_LuminalB","BRCA","BRCA_Normal","COAD","UCEC","LAML"))]

## TCGA histologies aligned with aPD1 data
mycancertypes.final <- c("SKCM", "KIRC","BLCA","STAD")
## Subset data to reduced types
tcgaData.tp.sub.filt <- tcgaData.tp.sub %>% dplyr::filter(project %in% mycancertypes.reduced) %>% droplevels()
tcgaData.tp.sub.extra.filt<- tcgaData.tp.sub.extra %>% dplyr::filter(project %in% mycancertypes.reduced) %>% droplevels()

# Make project a factor with levels
tcgaData.tp.sub.filt$project <- factor(tcgaData.tp.sub.filt$project, levels=mycancertypes.reduced)
tcgaData.tp.sub.extra.filt$project <- factor(tcgaData.tp.sub.extra.filt$project, levels=mycancertypes.reduced)

#CHECK DUPLICATED PATIENTS
which(duplicated(tcgaData.tp.sub.filt$bcr_patient_barcode))
## integer(0)
tcgaData.tp.sub.filt[tcgaData.tp.sub.filt$bcr_patient_barcode==tcgaData.tp.sub.filt$bcr_patient_barcode[125],]
##    bcr_patient_barcode project    PD_L1   logTMB BCR_Richness TCR_Richness
## 1:        TCGA-BT-A2LD    BLCA 10.89251 1.282085            2            0
##      B_cells  CD8cells    CD4cells TIS_sigscore TumorPurity OS OS.time PFI
## 1: 0.1571038 0.1523094 0.006754054   -0.7454874   0.9118676  1     623   1
##    PFI.time age_at_initial_pathologic_diagnosis gender
## 1:      555                                  78 FEMALE
tcgaData.tp.sub.filt$OS.time <-tcgaData.tp.sub.filt$OS.time/(365.24/12)
## ADD OBJECTIVE RESPONSE DATA
## ORR data
orrDf <- read_excel(paste0(otherInputData,"ORR_TMB_table.xlsx"),sheet = 2)
orrDf <- as.data.frame(orrDf[,c(1:4,9:10)])
colnames(orrDf) <- c("TCGA_ca", "PD1L1_responders","PD1L1_totalTreated", "ORR", "project","Additional_info")
# Select only lines with complete info in first 5 columns
orrDf_final <- orrDf[complete.cases(orrDf[,1:5]),]
# Change name for COAD MSI &MSS
orrDf_final$project[7:8] <- c("COAD_MSI", "COAD_MSS")

# UPDATED ONE
orrDf2 <- read_excel(paste0(otherInputData,"ORR new TABLE.xlsx"),sheet = 2)
orrDf2 <- as.data.frame(orrDf2[,c(1:4,9:10)])
colnames(orrDf2) <- c("TCGA_ca", "PD1L1_responders","PD1L1_totalTreated", "ORR", "project","Additional_info")
# Select only lines with complete info in first 5 columns
orrDf_final2 <- orrDf2[complete.cases(orrDf2[,1:5]),]
orrDf_final2$project <-str_replace(orrDf_final2$project," ","_")
## For common cancers, keep the updated ones and add the missing ones from old one.
## Common cancers
intersect(orrDf_final$project,orrDf_final2$project)
##  [1] "ACC"      "BLCA"     "BRCA_TN"  "CESC"     "CHOL"     "COAD_MSI"
##  [7] "COAD_MSS" "UCEC"     "ESCA"     "TGCT"     "GBM"      "HNSC"    
## [13] "LIHC"     "SKCM"     "MESO"     "LUAD"     "LUSC"     "UVM"     
## [19] "OV"       "PAAD"     "PRAD"     "KIRC"     "SARC"     "STAD"    
## [25] "KIRP"     "KICH"
## Missed cancers in updated version
setdiff(orrDf_final$project,orrDf_final2$project)
## [1] "BRCA" "DLBC" "THYM" "MISC"
orrDf_final.missed <- orrDf_final %>% dplyr::filter(project %in% setdiff(orrDf_final$project,orrDf_final2$project)) %>% droplevels()
# Merge with updated one
orrDf <- rbind(orrDf_final2,orrDf_final.missed) %>% distinct() %>% droplevels()
# Change name for COAD MSI &MSS
# orrDf_final2$project[7:8] <- c("COAD_MSI", "COAD_MSS")
tcgaData.tp.sub.filt.orr <- merge(tcgaData.tp.sub.filt, orrDf %>% dplyr::select(ORR,project) %>% distinct() %>% droplevels(),
                                  by="project", all=TRUE)
unique(tcgaData.tp.sub.filt.orr$project)
##  [1] "ACC"      "BLCA"     "BRCA"     "BRCA_TN"  "CESC"     "CHOL"    
##  [7] "COAD_MSI" "COAD_MSS" "DLBC"     "ESCA"     "GBM"      "HNSC"    
## [13] "KICH"     "KIRC"     "KIRP"     "LGG"      "LIHC"     "LUAD"    
## [19] "LUSC"     "MESO"     "MISC"     "OV"       "PAAD"     "PCPG"    
## [25] "PRAD"     "READ"     "SARC"     "SKCM"     "STAD"     "TGCT"    
## [31] "THCA"     "THYM"     "UCEC"     "UCS"      "UVM"
tcgaData.tp.sub.filt.orr <- merge(tcgaData.tp.sub.filt, orrDf %>% dplyr::select(ORR,project) %>% distinct() %>% droplevels(),
                                  by="project", all.x=TRUE)
unique(tcgaData.tp.sub.filt.orr$project)
##  [1] "ACC"      "BLCA"     "BRCA_TN"  "CESC"     "CHOL"     "COAD_MSI"
##  [7] "COAD_MSS" "ESCA"     "GBM"      "HNSC"     "KICH"     "KIRC"    
## [13] "KIRP"     "LGG"      "LIHC"     "LUAD"     "LUSC"     "MESO"    
## [19] "OV"       "PAAD"     "PCPG"     "PRAD"     "READ"     "SARC"    
## [25] "SKCM"     "STAD"     "TGCT"     "THCA"     "UCS"      "UVM"
length(unique(tcgaData.tp.sub.filt.orr$project))
## [1] 30
unique(orrDf$project)
##  [1] "ACC"      "BLCA"     "BRCA_TN"  "CESC"     "CHOL"     "COAD_MSI"
##  [7] "COAD_MSS" "ESCA"     "GBM"      "HNSC"     "KICH"     "KIRC"    
## [13] "KIRP"     "LIHC"     "LUAD"     "LUSC"     "MESO"     "OV"      
## [19] "PRAD"     "PAAD"     "SARC"     "SKCM"     "STAD"     "TGCT"    
## [25] "UCEC"     "UVM"      "BRCA"     "DLBC"     "THYM"     "MISC"

1.3 aPD1 data

immdata.TcrBcr.full.red <- loadRData(paste0(antiPDL1publicIntermediateData,"immdata.TcrBcr.full.red",modelTypeA,modelTypeB,modelTypeC,".RData"))
immdata.TcrBcr.full.red$response <- ifelse(immdata.TcrBcr.full.red$response=="CR+PR","CRPR",immdata.TcrBcr.full.red$response)
## FILTER DATA FOR PD, CR, PR (.sub)
immdata.TcrBcr.full.red.sub <- immdata.TcrBcr.full.red %>% dplyr::filter(response %in% c("PD","CRPR")) %>% droplevels()
## EXTRACT RESPONSE, NOMINAL, MEASURE VARIABLES FROM DATA
response.var <- "response"
nominal.vars <- c("dataset","tissue","gender")

measure.vars <-colnames(immdata.TcrBcr.full.red)[c(2:10,12,32:43)]
measure.vars <- measure.vars[c(6,22,1:3,20,21,4:5,10,7:9,11:19)]


## CHECK INFINITE LOG TMB VALUES
immdata.TcrBcr.full.red.sub[which(is.infinite(immdata.TcrBcr.full.red.sub$logTMB)),]
##  [1] run_accession      TuTACK_sigscore    TIS_sigscore       PDL1              
##  [5] CD8A               CD8B               TCRArichness.ds    StromalScore      
##  [9] ImmuneScore        ESTIMATEScore      title              age               
## [13] gender             disease_status     biopsy_site        primary_tumor     
## [17] treatment          prior_treatment    response           OS.time           
## [21] OS                 PFS.time           PFS                pre_on_treatment  
## [25] total_muts         nonsyn_muts        clonal_muts        subclonal_muts    
## [29] TMB                tissue             dataset            Bcells            
## [33] CD8cells           CD4cells           Tregs              NKcells           
## [37] DCcells            Monocytes          Granulocytes       Macrophages       
## [41] TumorPurity        logTMB             BCR.IGHrichness.ds
## <0 rows> (or 0-length row.names)
## Extract tissues in data so far
tissues <- unique(immdata.TcrBcr.full.red.sub$tissue)


# Which are the covariates I am considering
# covariatesIn <- measure.vars[c(1:2,4:7,14:16)]
covariatesIn <- measure.vars[c(1:2,4:7,14:16)]
# covariatesIn <- measure.vars[c(1:2,4:8,15:17)]

responseVar <- "response"

colnames(immdata.TcrBcr.full.red)[c(7,43,32,4)] <- c("TCR_Richness","BCR_Richness","B_cells","PD_L1")


immdata.TcrBcr.full.red$tissue <- ifelse(immdata.TcrBcr.full.red$tissue=="melanoma", "SKCM anti-PD-1/L1",
                                         ifelse(immdata.TcrBcr.full.red$tissue=="RCC","KIRC anti-PD-1/L1",
                                                ifelse(immdata.TcrBcr.full.red$tissue=="bladder","BLCA anti-PD-1/L1",                                             ifelse(immdata.TcrBcr.full.red$tissue=="gastric","STAD anti-PD-1/L1",immdata.TcrBcr.full.red$tissue))))

immdata.TcrBcr.full.red$tissue <- factor(immdata.TcrBcr.full.red$tissue, levels=paste0(mycancertypes.final, " anti-PD-1/L1"))

1.4 Set parameters and labels for analysis

parameters <- colnames(tcgaData.tp.sub.filt)[c(3:5,7:11)]
labels <- c("PD-L1 expression","logTMB","BCR Richness","B cells infiltration","CD8+ T-cells infilitration","CD4+ T-cells infilitration",
            "T-cell inflamed GEP", "Tumor Purity")
tcgaData.tp.sub.filt.orr %>% dplyr::select(project,bcr_patient_barcode,ORR) %>% dplyr::group_by(project,ORR) %>% distinct() %>% dplyr::summarize(n())
## # A tibble: 30 x 3
## # Groups:   project [30]
##    project     ORR `n()`
##    <chr>     <dbl> <int>
##  1 ACC      0.06      92
##  2 BLCA     0.181    411
##  3 BRCA_TN  0.0669   191
##  4 CESC     0.145    305
##  5 CHOL     0.0577    36
##  6 COAD_MSI 0.311     78
##  7 COAD_MSS 0        275
##  8 ESCA     0.172    185
##  9 GBM      0.0874   397
## 10 HNSC     0.146    528
## # ... with 20 more rows
## # i Use `print(n = ...)` to see more rows
tcga.cancers <- tcgaData.tp.sub.filt.orr %>% dplyr::pull(project) %>% unique()
orr_selected_cancers <- tcga.cancers[tcga.cancers %in% orrDf$project] # TOTAL 26 TCGA 

2 Correlations

Here I calculate:

  1. Global correlations for the 9 features
  2. Multicancer (4 cohorts) correlations for the 9 features
  3. Also calculate the significance of those correlations
  4. Correlations of TCR vs CD8/CD4 across TCGA with rolling avg
  5. Correlations of BCR vs Bcells/CD8/GEP across TCGA with rolling avg
  6. Objective response correlations

I want to have a network diagram, preferably a circle one, with stable positions, so not using distances, or creating clusters of highly correlated features. This diagram ideally should present the strength/size of the correlation by thickness of connecting line, whether it is positive or negative, by color of line and ideally should only present significant correlations, meaning that for non-significant, there will be no line. Idea is to provide a table with the correlations and the significance as supplementary table, so people can see exact values. But the graph should support visually the patterns, especially the transition from global to multicancer. Then as supplementary we should have the same network graphs for each cancer type separately, again to see if patterns are similar. It’s way easier with the graphs

Calculate correlation matrices between features

## TCGA GLOBAL
tidy_cors.global <- tcgaData.tp.sub.filt %>% dplyr::filter(project %in% mycancertypes.reduced) %>% 
  dplyr::select(all_of(featuresIn)) %>% 
  setNames(featuresInLabels) %>%
  correlate(use = "pairwise.complete.obs", # This is very important
  method = "spearman", diagonal=1) #%>% 
  #stretch()

# Check significance of correlations
cormat.gb <- tidy_cors.global %>% column_to_rownames(var="term") %>% as.matrix()
# head(cormat)
upper_tri <- get_upper_tri(cormat.gb)
melted_cormat.gb <- reshape2::melt(cormat.gb, na.rm = TRUE)
# head(melted_cormat)

formCors.gb <- formatted_cors(tcgaData.tp.sub.filt %>% dplyr::filter(project %in% mycancertypes.reduced) %>% 
  dplyr::select(all_of(featuresIn)) %>% setNames(featuresInLabels), methodSel = NULL)
formCors.gb$measure2 <- gsub('\\.\\.',"+ ",formCors.gb$measure2 )
formCors.gb$measure2 <- gsub('\\.'," ",formCors.gb$measure2 )
formCors.gb$measure2 <- ifelse(formCors.gb$measure2=="PD L1 expression","PD-L1 expression",
                            ifelse(formCors.gb$measure2=="T cell inflamed GEP","T-cell inflamed GEP",
                                   ifelse(formCors.gb$measure2=="CD8+ T cells infiltration","CD8+ T-cells infiltration",
                                          ifelse(formCors.gb$measure2=="CD4+ T cells infiltration","CD4+ T-cells infiltration",formCors.gb$measure2))))
formCors.gb$measure1 <- factor(formCors.gb$measure1, levels = levels(melted_cormat.gb$Var1))
formCors.gb$measure2 <- factor(formCors.gb$measure2, levels = levels(melted_cormat.gb$Var2))



## TCGA MULTICOHORT (4)
tidy_cors.multi <- tcgaData.tp.sub.filt %>% dplyr::filter(project %in% mycancertypes.final) %>% 
  dplyr::select(all_of(featuresIn))%>% 
  setNames(featuresInLabels) %>%
  correlate(use = "pairwise.complete.obs", # This is very important
  method = "spearman") #%>% 
  #stretch()
cormat.ml <- tidy_cors.multi %>% column_to_rownames(var="term") %>% as.matrix()
# head(cormat)
upper_tri <- get_upper_tri(cormat.ml)
melted_cormat.ml <- reshape2::melt(cormat.ml, na.rm = TRUE)
# head(melted_cormat)

formCors.ml <- formatted_cors(tcgaData.tp.sub.filt %>% dplyr::filter(project %in% mycancertypes.final) %>% 
  dplyr::select(all_of(featuresIn))%>% 
  setNames(featuresInLabels) , methodSel = NULL)
formCors.ml$measure2 <- gsub('\\.\\.',"+ ",formCors.ml$measure2 )
formCors.ml$measure2 <- gsub('\\.'," ",formCors.ml$measure2 )
formCors.ml$measure2 <- ifelse(formCors.ml$measure2=="PD L1 expression","PD-L1 expression",
                            ifelse(formCors.ml$measure2=="T cell inflamed GEP","T-cell inflamed GEP",
                                   ifelse(formCors.ml$measure2=="CD8+ T cells infiltration","CD8+ T-cells infiltration",
                                          ifelse(formCors.ml$measure2=="CD4+ T cells infiltration","CD4+ T-cells infiltration",formCors.ml$measure2))))
formCors.ml$measure1 <- factor(formCors.ml$measure1, levels = levels(melted_cormat.ml$Var1))
formCors.ml$measure2 <- factor(formCors.ml$measure2, levels = levels(melted_cormat.ml$Var2))


### TCGA INDIVID MULTICANCER

tidy_cors.multi.sep <- sapply(mycancertypes.final, function(x) tcgaData.tp.sub.filt %>% dplyr::filter(project ==x) %>% 
  dplyr::select(all_of(featuresIn))%>% 
  setNames(featuresInLabels) %>%
  correlate(use = "pairwise.complete.obs", # This is very important
  method = "spearman"),simplify = FALSE)


cormat.ml.sep <- sapply(tidy_cors.multi.sep, function(x) x %>% column_to_rownames(var="term") %>% as.matrix(), simplify = FALSE)
# head(cormat)
upper_tri.ml.sep <- sapply(cormat.ml.sep, function(x) get_upper_tri(x), simplify = FALSE)

melted_cormat.ml.sep <- sapply(cormat.ml.sep, function(x) reshape2::melt(x, na.rm = TRUE), simplify = FALSE)

formCors.ml.sep <- sapply(mycancertypes.final, function(x) formatted_cors(tcgaData.tp.sub.filt %>% dplyr::filter(project ==x) %>% 
  dplyr::select(all_of(featuresIn))%>% 
  setNames(featuresInLabels) , methodSel = NULL),simplify = FALSE)

formCors.ml.sep$SKCM$measure2 <- gsub('\\.\\.',"+ ",formCors.ml.sep$SKCM$measure2)
formCors.ml.sep$SKCM$measure2 <- gsub('\\.'," ",formCors.ml.sep$SKCM$measure2 )
formCors.ml.sep$SKCM$measure2 <- ifelse(formCors.ml.sep$SKCM$measure2=="PD L1 expression","PD-L1 expression",
                            ifelse(formCors.ml.sep$SKCM$measure2=="T cell inflamed GEP","T-cell inflamed GEP",
                                   ifelse(formCors.ml.sep$SKCM$measure2=="CD8+ T cells infiltration","CD8+ T-cells infiltration",
                                          ifelse(formCors.ml.sep$SKCM$measure2=="CD4+ T cells infiltration","CD4+ T-cells infiltration",formCors.ml.sep$SKCM$measure2))))
formCors.ml.sep$SKCM$measure1 <- factor(formCors.ml.sep$SKCM$measure1, levels = levels(melted_cormat.ml.sep$SKCM$Var1))
formCors.ml.sep$SKCM$measure2 <- factor(formCors.ml.sep$SKCM$measure2, levels = levels(melted_cormat.ml.sep$SKCM$Var2))
## STAD
formCors.ml.sep$STAD$measure2 <- gsub('\\.\\.',"+ ",formCors.ml.sep$STAD$measure2)
formCors.ml.sep$STAD$measure2 <- gsub('\\.'," ",formCors.ml.sep$STAD$measure2 )
formCors.ml.sep$STAD$measure2 <- ifelse(formCors.ml.sep$STAD$measure2=="PD L1 expression","PD-L1 expression",
                            ifelse(formCors.ml.sep$STAD$measure2=="T cell inflamed GEP","T-cell inflamed GEP",
                                   ifelse(formCors.ml.sep$STAD$measure2=="CD8+ T cells infiltration","CD8+ T-cells infiltration",
                                          ifelse(formCors.ml.sep$STAD$measure2=="CD4+ T cells infiltration","CD4+ T-cells infiltration",formCors.ml.sep$STAD$measure2))))
formCors.ml.sep$STAD$measure1 <- factor(formCors.ml.sep$STAD$measure1, levels = levels(melted_cormat.ml.sep$STAD$Var1))
formCors.ml.sep$STAD$measure2 <- factor(formCors.ml.sep$STAD$measure2, levels = levels(melted_cormat.ml.sep$STAD$Var2))
##BLCA
formCors.ml.sep$BLCA$measure2 <- gsub('\\.\\.',"+ ",formCors.ml.sep$BLCA$measure2)
formCors.ml.sep$BLCA$measure2 <- gsub('\\.'," ",formCors.ml.sep$BLCA$measure2 )
formCors.ml.sep$BLCA$measure2 <- ifelse(formCors.ml.sep$BLCA$measure2=="PD L1 expression","PD-L1 expression",
                            ifelse(formCors.ml.sep$BLCA$measure2=="T cell inflamed GEP","T-cell inflamed GEP",
                                   ifelse(formCors.ml.sep$BLCA$measure2=="CD8+ T cells infiltration","CD8+ T-cells infiltration",
                                          ifelse(formCors.ml.sep$BLCA$measure2=="CD4+ T cells infiltration","CD4+ T-cells infiltration",formCors.ml.sep$BLCA$measure2))))
formCors.ml.sep$BLCA$measure1 <- factor(formCors.ml.sep$BLCA$measure1, levels = levels(melted_cormat.ml.sep$BLCA$Var1))
formCors.ml.sep$BLCA$measure2 <- factor(formCors.ml.sep$BLCA$measure2, levels = levels(melted_cormat.ml.sep$BLCA$Var2))
##KIRC
formCors.ml.sep$KIRC$measure2 <- gsub('\\.\\.',"+ ",formCors.ml.sep$KIRC$measure2)
formCors.ml.sep$KIRC$measure2 <- gsub('\\.'," ",formCors.ml.sep$KIRC$measure2 )
formCors.ml.sep$KIRC$measure2 <- ifelse(formCors.ml.sep$KIRC$measure2=="PD L1 expression","PD-L1 expression",
                            ifelse(formCors.ml.sep$KIRC$measure2=="T cell inflamed GEP","T-cell inflamed GEP",
                                   ifelse(formCors.ml.sep$KIRC$measure2=="CD8+ T cells infiltration","CD8+ T-cells infiltration",
                                          ifelse(formCors.ml.sep$KIRC$measure2=="CD4+ T cells infiltration","CD4+ T-cells infiltration",formCors.ml.sep$KIRC$measure2))))
formCors.ml.sep$KIRC$measure1 <- factor(formCors.ml.sep$KIRC$measure1, levels = levels(melted_cormat.ml.sep$KIRC$Var1))
formCors.ml.sep$KIRC$measure2 <- factor(formCors.ml.sep$KIRC$measure2, levels = levels(melted_cormat.ml.sep$KIRC$Var2))

2.1 Networks of correlations with clusters

global.net<-network_plot(tidy_cors.global,min_cor = .3, repel= TRUE, curved = TRUE,colours = c("navyblue", "white", "indianred2"),) + labs(color="Spearman's Correlation") 

global.net$layers[[3]]$aes_params$family <- "Palatino Linotype"
global.net$layers[[3]]$aes_params$size <- 8

multi.net<-network_plot(tidy_cors.multi,min_cor = .3, repel= TRUE, curved = TRUE,colours = c("navyblue", "white", "indianred2"),) + labs(color="Spearman's Correlation") 

multi.net$layers[[3]]$aes_params$family <- "Palatino Linotype"
multi.net$layers[[3]]$aes_params$size <- 8



full_netClusters <- ggarrange(plotlist = list(global.net, multi.net), common.legend = TRUE, legend="bottom",labels = c("A. Pancancer", "B. Multicancer"),font.label = list(size = 14, color = "black", face = "bold", family = "Palatino Linotype"))

full_netClusters

multi.net.sep<-sapply(tidy_cors.multi.sep, function(x) network_plot(x,min_cor = 0.3, repel= TRUE, curved = TRUE,colours = c("navyblue", "white", "indianred2"),) + labs(color="Spearman's Correlation"), simplify = FALSE) 
multi.net.sep
## $SKCM
## 
## $KIRC
## 
## $BLCA
## 
## $STAD

2.1.1 Circle Networks with gradient color

# Select only significant p-value correlations
formCors.ml.sign <- formCors.ml %>% dplyr::filter(sig_p==TRUE) %>% droplevels()
formCors.gb.sign <- formCors.gb %>% dplyr::filter(sig_p==TRUE) %>% droplevels()

cor.graph.ml <- as_tbl_graph(formCors.ml.sign, directed = FALSE) %>% activate(edges) %>%
  dplyr::rename(weight = r)
# Select only significant p-value correlations
formCors.ml.sep.sign <- sapply(formCors.ml.sep, function(x) x %>% dplyr::filter(sig_p==TRUE) %>% droplevels(), simplify = FALSE)

cor.graph.ml.sep <- sapply(formCors.ml.sep.sign, function(x) as_tbl_graph(x, directed = FALSE) %>% activate(edges) %>%
  dplyr::rename(weight = r), simplify = FALSE)

2.2 Objective response rate

2.2.1 TCR & BCR

tcgaData.tp.sub.filt.orr %>% dplyr::select(project,bcr_patient_barcode,ORR) %>% dplyr::group_by(project,ORR) %>% distinct() %>% dplyr::summarize(n())
## # A tibble: 30 x 3
## # Groups:   project [30]
##    project     ORR `n()`
##    <chr>     <dbl> <int>
##  1 ACC      0.06      92
##  2 BLCA     0.181    411
##  3 BRCA_TN  0.0669   191
##  4 CESC     0.145    305
##  5 CHOL     0.0577    36
##  6 COAD_MSI 0.311     78
##  7 COAD_MSS 0        275
##  8 ESCA     0.172    185
##  9 GBM      0.0874   397
## 10 HNSC     0.146    528
## # ... with 20 more rows
## # i Use `print(n = ...)` to see more rows
tcga.cancers <- tcgaData.tp.sub.filt.orr %>% pull(project) %>% unique()
orr_selected_cancers <- tcga.cancers[tcga.cancers %in% orrDf$project] # TOTAL 26 TCGA 
orr_tcr <- orr_corrPlot_tcga(tcgaData.tp.sub.filt.orr ,"TCR_Richness", aggregateBy="project", median,"median", mergeByCol="project",orr_selected_cancers, orrDf, "TCR Richness", 20, 0.08,0.015, "pearson")
# orr_tcr
orr_bcr <- orr_corrPlot_tcga(tcgaData.tp.sub.filt.orr ,"BCR_Richness", aggregateBy="project", median,"median", mergeByCol="project",orr_selected_cancers, orrDf, "BCR Richness", 24, 0.08,0.015, "pearson")

leg <- get_legend(orr_bcr +theme(legend.position="bottom", legend.box = "horizontal"))
pOrr <-ggdraw() +
  draw_plot(orr_tcr +rremove("legend"),x = 0, y = 0.06, width = .5, height = .93) +
  draw_plot(orr_bcr +rremove("legend"),x = 0.5, y = 0.06, width = .5, height = .93) +
  draw_plot(leg,x = 0.45, y = 0, width = .1, height = .05)+
  draw_plot_label(label = c("A", "B"), size = 15,
                  x = c(0, 0.5), y = c(1, 1),family="Palatino Linotype")

pOrr

2.2.2 Other parameters

others <- c("logTMB","PD_L1", "TIS_sigscore","CD8cells", "CD4cells", "B_cells", "TumorPurity")
othersLabels <- featuresInLabels[3:9]
othersxPlus <-c(.7,.7, .3, .03, .03,  0.035,.07)
othersyPlus <-c(0.08, 0.08, 0.08, 0.08, 0.08, 0.08, 0.08)
othersyMinus <-c(0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015)

othersParam <- list(others, othersLabels, othersxPlus, othersyPlus, othersyMinus)
orr_others <- sapply(1:7, function(x) orr_corrPlot_tcga(tcgaData.tp.sub.filt.orr ,othersParam[[1]][x], aggregateBy="project", median,"median", mergeByCol="project",orr_selected_cancers, orrDf, othersParam[[2]][x],othersParam[[3]][x], othersParam[[4]][x], othersParam[[5]][x], "pearson"), simplify = FALSE)


pOrr.other <- ggarrange(plotlist = orr_others, legend = "bottom", common.legend = TRUE, ncol=3, nrow=3,labels = c(paste0(paste0(LETTERS[1:7],""))),font.label = list(size = 16, color = "black", face = "bold", family = "Palatino Linotype"))
pOrr.other

2.2.3 All parameters together

others <- featuresIn
othersLabels <- featuresInLabels

othersxPlus <-c(20,24,.7,.7, .3, .03, .03,  0.035,.07)
othersyPlus <-c(0.08,0.08,0.08, 0.08, 0.08, 0.08, 0.08, 0.08, 0.08)
othersyMinus <-c(0.015,0.015,0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015)


othersParam <- list(others, othersLabels, othersxPlus, othersyPlus, othersyMinus)
orr_others <- sapply(1:9, function(x) orr_corrPlot_tcga(tcgaData.tp.sub.filt.orr ,othersParam[[1]][x], aggregateBy="project", median,"median", mergeByCol="project",orr_selected_cancers, orrDf, othersParam[[2]][x],othersParam[[3]][x], othersParam[[4]][x], othersParam[[5]][x], "pearson"), simplify = FALSE)
# orr_others


pOrr.all <- ggarrange(plotlist = orr_others, legend = "bottom", common.legend = TRUE, ncol=3, nrow=3,labels = c(paste0(paste0(LETTERS[1:9],". "),othersLabels)),font.label = list(size = 12, color = "black", face = "bold", family = "Palatino Linotype"), align="v",
                      hjust=c(-0.34,-0.34,-0.5,-0.26,-0.24,-0.2,-0.2,-0.27,-0.345))
pOrr.all

pOrr.all2 <- ggarrange(plotlist = orr_others, legend = "bottom", common.legend = TRUE, ncol=3, nrow=3,labels = "AUTO",font.label = list(size = 12, color = "black", face = "bold", family = "Palatino Linotype"))
pOrr.all2

2.3 Boxplot correlations CD8 vs TCR | B cells vs BCR

box_tcr_cd8 <- grid.draw(TwosigScores_boxplot_tcga(tcgaData.tp.sub.filt,mycancertypes.reduced,"project","CD8cells","TCR_Richness",median,"median","CD8+ T cells infiltration","TCR Richness",-280,-245.0,rollSum=TRUE))
box_tcr_cd8

box_bcr_bcells <- TwosigScores_boxplot_tcga(tcgaData.tp.sub.filt,mycancertypes.reduced,"project","B_cells","BCR_Richness",median,"median","B cell infiltration","BCR Richness",-310.3,-265.0,rollSum=TRUE)
# box_bcr_bcells

box.tcr.bcr <- ggdraw() + 
  draw_plot(TwosigScores_boxplot_tcga(tcgaData.tp.sub.filt,mycancertypes.reduced,"project","CD8cells","TCR_Richness",median,"median","CD8+ T cells infiltration","TCR Richness",-910,-750.0,rollSum=TRUE),x = 0, y = 0.5, width = 1, height = 0.5) +
  draw_plot(TwosigScores_boxplot_tcga(tcgaData.tp.sub.filt,mycancertypes.reduced,"project","B_cells","BCR_Richness",median,"median","B cell infiltration","BCR Richness",-820,-680.0,rollSum=TRUE),x = 0, y = 0.001, width = 1, height = 0.5) +
  draw_plot_label(label = c("A", "B"), size = 15,
                  x = c(0, 0), y = c(1, 0.5),family="Palatino Linotype")

2.3.1 TCR vs CD8/GEP | BCR vs B cells

box.tcr.bcr <- ggdraw() + 
  draw_plot(TwosigScores_boxplot_tcga(tcgaData.tp.sub.filt,mycancertypes.reduced,"project","CD8cells","TCR_Richness",median,"median","CD8+ T cells infiltration","TCR Richness",-.04,-.00001,logY=FALSE, segm = c(250,500), ylims=c(0,1800), ticks = c(50,300),relHeights = c(1,0,.4),rollSum=TRUE),x = 0, y = 0.66, width = 1, height = 0.33) +
   draw_plot(TwosigScores_boxplot_tcga(tcgaData.tp.sub.filt,mycancertypes.reduced,"project","TIS_sigscore","TCR_Richness",median,"median","T cell inflamed GEP","TCR Richness",-.04,-.00001,logY=FALSE, segm = c(250,500), ylims=c(0,1800), ticks = c(40,300),relHeights = c(1,0,.4),rollSum=TRUE),x = 0, y = 0.33, width = 1, height = 0.33) +
  draw_plot(TwosigScores_boxplot_tcga(tcgaData.tp.sub.filt,mycancertypes.reduced,"project","B_cells","BCR_Richness",median,"median","B cells infiltration","BCR Richness",-.04,-.00001,logY=FALSE, segm = c(320,500), ylims=c(0,1800), ticks = c(50,300),relHeights = c(1,0,.4),rollSum=TRUE),x = 0, y = 0, width = 1, height = 0.33)  +
  draw_plot_label(label = c("A", "B","C"), size = 15,
                  x = c(0, 0, 0), y = c(1, 0.66,0.33),family="Palatino Linotype")

box.tcr.bcr

2.3.2 BCR vs CD8/GEP

p.bcr <-ggdraw() + 
  draw_plot(TwosigScores_boxplot_tcga(tcgaData.tp.sub.filt,mycancertypes.reduced,"project","CD8cells","BCR_Richness",median,"median","CD8+ T cells infiltration","BCR Richness",-.04,-.00001,logY=FALSE, segm = c(340,500), ylims=c(0,1800), ticks = c(50,300),relHeights = c(1,0,.4),rollSum=TRUE),x = 0, y = 0.5, width = 1, height = 0.49) +
   draw_plot(TwosigScores_boxplot_tcga(tcgaData.tp.sub.filt,mycancertypes.reduced,"project","TIS_sigscore","BCR_Richness",median,"median","T cell inflamed GEP","BCR Richness",-.04,-.00001,logY=FALSE, segm = c(340,500), ylims=c(0,1800), ticks = c(50,300),relHeights = c(1,0,.4),rollSum=TRUE),x = 0, y = 0, width = 1, height = 0.49) +
  draw_plot_label(label = c("A", "B"), size = 15,
                  x = c(0, 0), y = c(1, 0.5),family="Palatino Linotype")
                                     
p.bcr



p.bcr_h <-ggdraw() + 
  draw_plot(TwosigScores_boxplot_tcga(tcgaData.tp.sub.filt,mycancertypes.reduced,"project","CD8cells","BCR_Richness",median,"median","CD8+ T cells infiltration","BCR Richness",-.04,-.00001,logY=FALSE, segm = c(340,500), ylims=c(0,1800), ticks = c(50,300),relHeights = c(1,0,.4),rollSum=TRUE),
            x = 0, y = 0, width = 0.49, height = 1) +
   draw_plot(TwosigScores_boxplot_tcga(tcgaData.tp.sub.filt,mycancertypes.reduced,"project","TIS_sigscore","BCR_Richness",median,"median","T cell inflamed GEP","BCR Richness",-.04,-.00001,logY=FALSE, segm = c(340,500), ylims=c(0,1800), ticks = c(50,300),relHeights = c(1,0,.4),rollSum=TRUE),
             x = 0.5, y = 0, width = 0.49, height = 1) +
  draw_plot_label(label = c("A", "B"), size = 16,
                  x = c(0, 0.5), y = c(1, 1),family="Palatino Linotype")
                                     
p.bcr_h

Using complete data across BCR, TCR, CD8, GEP and Bcells:

p.bcr2 <- ggdraw() + 
  draw_plot(TwosigScores_boxplot_tcga(tcgaData.tp.sub.filt,mycancertypes.reduced,"project","CD8cells","BCR_Richness",median,"median","CD8+ T cells infiltration","BCR Richness",-.04,-.00001,logY=FALSE, segm = c(340,500), ylims=c(0,1800), ticks = c(50,300),relHeights = c(1,0,.4),rollSum=TRUE),x = 0, y = 0.5, width = 1, height = 0.49) +
   draw_plot(TwosigScores_boxplot_tcga(tcgaData.tp.sub.filt,mycancertypes.reduced,"project","TIS_sigscore","BCR_Richness",median,"median","T cell inflamed GEP","BCR Richness",-.04,-.00001,logY=FALSE, segm = c(340,500), ylims=c(0,1800), ticks = c(50,300),relHeights = c(1,0,.4),rollSum=TRUE,filterBY = c("BCR_Richness","TCR_Richness","CD8cells","TIS_sigscore","B_cells")),x = 0, y = 0, width = 1, height = 0.49) +
  draw_plot_label(label = c("A", "B"), size = 15,
                  x = c(0, 0), y = c(1, 0.5),family="Palatino Linotype")

p.bcr2

3 SUPPLEMENTAL FILE 1: Biomarkers pairwise Spearman’s correlations- pancancer, multicancer, individual cohorts

Correlation tables with significance results for:

  1. global
  2. multicancer
  3. separate histologies
# Individual correlations

formCors.sep <- sapply(mycancertypes.final, function(x) formatted_cors(tcgaData.tp.sub.filt %>% dplyr::filter(project ==x) %>% 
  dplyr::select(all_of(featuresIn)) %>% setNames(featuresInLabels), methodSel = NULL), simplify = FALSE)
formCors.sep <- ldply(formCors.sep)

formCors.sep$measure2 <- gsub('\\.\\.',"+ ",formCors.sep$measure2 )
formCors.sep$measure2 <- gsub('\\.'," ",formCors.sep$measure2 )
formCors.sep$measure2 <- ifelse(formCors.sep$measure2=="PD L1 expression","PD-L1 expression",
                            ifelse(formCors.sep$measure2=="T cell inflamed GEP","T-cell inflamed GEP",
                                   ifelse(formCors.sep$measure2=="CD8+ T cells infiltration","CD8+ T-cells infiltration",
                                          ifelse(formCors.sep$measure2=="CD4+ T cells infiltration","CD4+ T-cells infiltration",formCors.sep$measure2))))

# Spearman's rho rank correlation coefficients for all possible pairs of columns of a matrix
# Ranks are computed using efficient algorithms
formCors.gb %>% head()
## # A tibble: 6 x 8
##   measure1     measure2           r     n      P sig_p p_if_sig r_if_sig
##   <fct>        <fct>          <dbl> <dbl>  <dbl> <lgl>    <dbl>    <dbl>
## 1 TCR Richness TCR Richness 1        3914 NA     NA          NA   NA    
## 2 TCR Richness BCR Richness 0.590    3914  0     TRUE         0    0.590
## 3 BCR Richness BCR Richness 1        3914 NA     NA          NA   NA    
## 4 TCR Richness logTMB       0.00485  3914  0.761 FALSE       NA   NA    
## 5 BCR Richness logTMB       0.217    3914  0     TRUE         0    0.217
## 6 logTMB       logTMB       1        3914 NA     NA          NA   NA
formCors.ml %>% head()
## # A tibble: 6 x 8
##   measure1     measure2          r     n           P sig_p    p_if_sig r_if_sig
##   <fct>        <fct>         <dbl> <dbl>       <dbl> <lgl>       <dbl>    <dbl>
## 1 TCR Richness TCR Richness  1       835 NA          NA    NA            NA    
## 2 TCR Richness BCR Richness  0.668   835  0          TRUE   0             0.668
## 3 BCR Richness BCR Richness  1       835 NA          NA    NA            NA    
## 4 TCR Richness logTMB       -0.157   835  0.00000542 TRUE   0.00000542   -0.157
## 5 BCR Richness logTMB        0.102   835  0.00328    TRUE   0.00328       0.102
## 6 logTMB       logTMB        1       835 NA          NA    NA            NA
formCors.sep %>% head()
##    .id     measure1     measure2         r  n            P sig_p     p_if_sig
## 1 SKCM TCR Richness TCR Richness 1.0000000 56           NA    NA           NA
## 2 SKCM TCR Richness BCR Richness 0.6077879 56 6.773896e-07  TRUE 6.773896e-07
## 3 SKCM BCR Richness BCR Richness 1.0000000 56           NA    NA           NA
## 4 SKCM TCR Richness       logTMB 0.3617825 56 6.148641e-03  TRUE 6.148641e-03
## 5 SKCM BCR Richness       logTMB 0.2529044 56 6.002960e-02 FALSE           NA
## 6 SKCM       logTMB       logTMB 1.0000000 56           NA    NA           NA
##    r_if_sig
## 1        NA
## 2 0.6077879
## 3        NA
## 4 0.3617825
## 5        NA
## 6        NA
newColnames <- c("Biomarker 1", "Biomarker 2", "Spearman's rho correlation", "N (observations)", "P-value")

formCors.gb<- formCors.gb %>%
  dplyr::select(all_of(c("measure1","measure2","r","n","P"))) %>% 
  setNames(newColnames)


formCors.ml<- formCors.ml %>%
  dplyr::select(all_of(c("measure1","measure2","r","n","P"))) %>% 
  setNames(newColnames)


formCors.sep<- formCors.sep %>%
  dplyr::select(all_of(c(".id","measure1","measure2","r","n","P"))) %>% 
  setNames(c("TCGA cohort",newColnames))
#Pairs with fewer than 2 non-missing values have the r values set to NA
# Uses midranks in case of ties, as described by Hollander and Wolfe. P-values are approximated by using the t or F distributions.
##################
headerStyle1 <- createStyle(
  fontSize = 11, fontColour = "black",
  fgFill = NULL, halign = "left",textDecoration = "bold"
)

TitleStyle1 <- createStyle(
  fontSize = 14, fontColour = "black",
  fgFill = NULL, halign = "left",textDecoration = NULL
)


TitleStyle2 <- createStyle(
  fontSize = 12, fontColour = "black",
  fgFill = NULL, halign = "left",textDecoration = "bold"
)


TitleStyle3 <- createStyle(
  fontSize = 11, fontColour = "black",
  fgFill = NULL, halign = "left",textDecoration = NULL
)


wb <- createWorkbook()

# PANCANCER
addWorksheet(wb, "Pan-cancer")
writeData(wb, "Pan-cancer", "Supplementary table 1. Biomarkers pairwise Spearman's correlations", startCol = 1, startRow = 1)
addStyle(wb = wb, sheet = 1, rows = 1, cols = 1, style = TitleStyle1)
# Second
writeData(wb, "Pan-cancer","PAN-CANCER", startCol = 1, startRow = 3)
addStyle(wb = wb, sheet = 1, rows = 3, cols = 1, style = TitleStyle2)
writeData(wb, "Pan-cancer","Assymptotic P-values are shown, apptoximated by using the t or F distributions", startCol = 1, startRow = 4)
addStyle(wb = wb, sheet = 1, rows = 4, cols = 1, style = TitleStyle3)
writeData(wb, "Pan-cancer","Pairs with fewer than 2 non-missing values have the Spearman's r values set to NA", startCol = 1, startRow = 5)
addStyle(wb = wb, sheet = 1, rows = 5, cols = 1, style = TitleStyle3)
# Write table with data
writeData(wb, "Pan-cancer", formCors.gb %>% as.data.frame(), startCol = 1, startRow = 7, rowNames = FALSE,headerStyle = headerStyle1)

## MULTICANCER
addWorksheet(wb, "Multicancer")
writeData(wb, "Multicancer", "Supplementary table 1. Biomarkers pairwise Spearman's correlations", startCol = 1, startRow = 1)
addStyle(wb = wb, sheet = 2, rows = 1, cols = 1, style = TitleStyle1)
# Second
writeData(wb, "Multicancer","MULTI-CANCER", startCol = 1, startRow = 3)
addStyle(wb = wb, sheet = 2, rows = 3, cols = 1, style = TitleStyle2)
writeData(wb, "Multicancer","Assymptotic P-values are shown, apptoximated by using the t or F distributions", startCol = 1, startRow = 4)
addStyle(wb = wb, sheet = 2, rows = 4, cols = 1, style = TitleStyle3)
writeData(wb, "Multicancer","Pairs with fewer than 2 non-missing values have the Spearman's r values set to NA", startCol = 1, startRow = 5)
addStyle(wb = wb, sheet = 2, rows = 5, cols = 1, style = TitleStyle3)
# Write table with data
writeData(wb, "Multicancer", formCors.ml %>% as.data.frame(), startCol = 1, startRow = 7, rowNames = FALSE,headerStyle = headerStyle1)

## INDIVIDUAL
addWorksheet(wb, "Individual cohorts")
writeData(wb, "Individual cohorts", "Supplementary table 1. Biomarkers pairwise Spearman's correlations", startCol = 1, startRow = 1)
addStyle(wb = wb, sheet = 3, rows = 1, cols = 1, style = TitleStyle1)
# Second
writeData(wb, "Individual cohorts","INDIVIDUAL TCGA COHORTS (multicancer)", startCol = 1, startRow = 3)
addStyle(wb = wb, sheet = 3, rows = 3, cols = 1, style = TitleStyle2)
writeData(wb, "Individual cohorts","Assymptotic P-values are shown, apptoximated by using the t or F distributions", startCol = 1, startRow = 4)
addStyle(wb = wb, sheet = 3, rows = 4, cols = 1, style = TitleStyle3)
writeData(wb, "Pan-cancer","Pairs with fewer than 2 non-missing values have the Spearman's r values set to NA", startCol = 1, startRow = 5)
addStyle(wb = wb, sheet = 3, rows = 5, cols = 1, style = TitleStyle3)
# Write table with data
writeData(wb, "Individual cohorts", formCors.sep %>% as.data.frame(), startCol = 1, startRow = 7, rowNames = FALSE,headerStyle = headerStyle1)

saveWorkbook(wb,paste0(projectOutDataPath,"/","Supplemental File 1.xlsx"), overwrite = TRUE)

4 Gene ontology specific BPs

Extract GO BP annotation for specific processes and all their child processes

# Annotation of GO Identifiers to their Biological Process Offspring
lumphActiv <- c("GO:0002285",get("GO:0002285", GOBPOFFSPRING))
agPresent <- c("GO:0019882",get("GO:0019882", GOBPOFFSPRING))
igProd <- c("GO:0002377",get("GO:0002377", GOBPOFFSPRING))
ifng.a <- c("GO:0034341",get("GO:0034341", GOBPOFFSPRING))
ifng.b <- c("GO:0032609",get("GO:0032609", GOBPOFFSPRING))
AgSignaling <- c("GO:0050851", get("GO:0050851",GOBPOFFSPRING))#


bp_ImmuneRelated.focused <- list(#"T cell activation" = tCellActiv,
                                 "Lymphocyte activation involved in immune response" = lumphActiv,
                                 "Antigen processing and presentation" = agPresent,
                                 "Antigen receptor mediated signaling" = AgSignaling,
                                 "Immunoglobulin production" = igProd,
                                 "Response to type II interferon" = ifng.a,
                                 "Type II interferon production" = ifng.b#,
                                 # "B cell immunity"=bcellImmunity,
                                 # "T cell immunity"=tcellImmunity
                                 )

bp_ImmuneRelated.focused <- sapply(bp_ImmuneRelated.focused, function(x) as.data.frame(x), simplify=FALSE)

bp_ImmuneRelated.focused.DF <- ldply(bp_ImmuneRelated.focused)
colnames(bp_ImmuneRelated.focused.DF) <- c("Parent Description","ID")

5 GO BCR high vs BCR low samples

5.1 LFC = 0

Load GO ORA results

newCol <- "BCR"
suffix <- paste0("_",newCol)

Only upregulated

## MELANOMA
skcm.res = get_geneLists(DEA_GOoutData,paste0("SKCM.Degs.dataset_lfc0",suffix), lfcThres=0)
## RCC
kirc.res = get_geneLists(DEA_GOoutData,paste0("KIRC.Degs.dataset_lfc0",suffix), lfcThres=0)
## BLADDER
blca.res = get_geneLists(DEA_GOoutData,paste0("BLCA.Degs.dataset_lfc0",suffix), lfcThres=0)
## GASTRIC
stad.res = get_geneLists(DEA_GOoutData,paste0("STAD.Degs.dataset_lfc0",suffix), lfcThres=0)

## All together

go.all <- sapply(list("SKCM"=skcm.res,"KIRC"=kirc.res,"BLCA" = blca.res, "STAD" = stad.res), function(x) x[[3]], simplify = FALSE )
go.all.DF <- ldply(go.all)
go.all.DF.up <- go.all.DF %>% dplyr::filter(group=="upregulated")
# 
# skcm_genes <- loadRData(paste0(DEA_GOoutData,"/geneBackground","_SKCM",Rdata.suffix))
# blca_genes <- loadRData(paste0(DEA_GOoutData,"/geneBackground","_BLCA",Rdata.suffix))
# kirc_genes <- loadRData(paste0(DEA_GOoutData,"/geneBackground","_KIRC",Rdata.suffix))
# stad_genes <- loadRData(paste0(DEA_GOoutData,"/geneBackground","_STAD",Rdata.suffix))
# 
# 
# ## Separate histologies
# if(isTRUE(runClean)){
# 
#   bp <- sapply(go.all, function(x) enrichGO(x$gene, ont="BP", OrgDb = 'org.Hs.eg.db',keyType = "SYMBOL",pAdjustMethod = "BH", universe=skcm_genes,
#                             pvalueCutoff  = 0.05,
#                             qvalueCutoff = 0.05,minGSSize=5), simplify = FALSE)
#   save(bp,file=paste0(DEA_GOoutData,"bpGO_allTCGA_res",suffix,Rdata.suffix))
# }else{
#   bp_bcr_0<- loadRData(paste0(DEA_GOoutData,"bpGO_allTCGA_res",suffix,Rdata.suffix))
# }

5.1.1 Enrichment results

# Load GO enrichment results
bp_bcr_0<- loadRData(paste0(DEA_GOoutData,"bpGO_allTCGA_res",suffix,Rdata.suffix))
## Drop GO terms not included in our focused GO terms set,
bp_bcr_0.filt <- sapply(bp_bcr_0, function(x) dropGO(x, level = NULL, term = setdiff(x %>% as.data.frame() %>% pull(ID),bp_ImmuneRelated.focused.DF$ID)), simplify = FALSE)

# Then Similarity analysis on reduced terms
if(isTRUE(runFiltSim)){
  bp_bcr_0.filt.red <- sapply(bp_bcr_0.filt, function(x) simplify(x, cutoff=0.45, by="p.adjust", select_fun=min, measure="Wang"), simplify = FALSE)
  save(bp_bcr_0.filt.red,file=paste0(DEA_GOoutData,"bpGO_allTCGA_res_filt_reduced",suffix,Rdata.suffix))
## REMEMBER: The higher the threshold, the more the redundant terms are removed
}else{
  bp_bcr_0.filt.red <- loadRData(paste0(DEA_GOoutData,"bpGO_allTCGA_res_filt_reduced",suffix,Rdata.suffix))

}

5.1.2 Reduced filtered BPs

bp_bcr_0.filt.red.DFs <- sapply(bp_bcr_0.filt.red, function(x) x %>% as.data.frame() %>% 
    dplyr::select(-geneID) %>% 
    dplyr::mutate(category=bp_ImmuneRelated.focused.DF$`Parent Description`[match(ID,bp_ImmuneRelated.focused.DF$ID)]) %>%
    dplyr::mutate("GeneRatio"=parse_ratio(GeneRatio)), simplify=FALSE)


bp_bcr_0.filt.red.merged <- ldply(bp_bcr_0.filt.red.DFs)


bubble_bcr.cancers.filt.red <- sapply(bp_bcr_0.filt.red.DFs , function(x) GO_bubblePlot(DF=x,
              sortVar="GeneRatio",xVar="GeneRatio",yVar="Description",sizeVar="Count",fillVar="p.adjust", aggreg=FALSE,choosePlot="norm",sizeMin=2,sizeMax=182, xMax=ceiling_dec(max(bp_bcr_0.filt.red.merged$GeneRatio),2), seqN=10, fillMax=max(bp_bcr_0.filt.red.merged$p.adjust)), simplify = FALSE)
pBub_bcr_0.filt.red <-ggarrange(plotlist = bubble_bcr.cancers.filt.red, 
          nrow=1, ncol=4, labels = c("SKCM", "KIRC", "BLCA", "STAD"),font.label = list(size = 14, color = "black", face = "bold", family = "Palatino Linotype"), common.legend = T, legend = "bottom")

pBub_bcr_0.filt.red

5.1.3 SUPPLEMENTAL FILE 3: GO enrichment & similarity

go_description <-get_names(bp_ImmuneRelated.focused.DF$ID)

bps_filtered_df <- bp_ImmuneRelated.focused.DF
bps_filtered_df$`BP Offspring Term description` <- go_description$go_name
colnames(bps_filtered_df)[1]<- "BP Term description"
#################

##################
headerStyle1 <- createStyle(
  fontSize = 11, fontColour = "black",
  fgFill = NULL, halign = "left",textDecoration = "bold"
)

TitleStyle1 <- createStyle(
  fontSize = 14, fontColour = "black",
  fgFill = NULL, halign = "left",textDecoration = NULL
)


TitleStyle2 <- createStyle(
  fontSize = 12, fontColour = "black",
  fgFill = NULL, halign = "left",textDecoration = "bold"
)


TitleStyle3 <- createStyle(
  fontSize = 11, fontColour = "black",
  fgFill = NULL, halign = "left",textDecoration = NULL
)

var <- c("SKCM", "KIRC", "BLCA", "STAD")

wb <- createWorkbook()

addWorksheet(wb, "Focused BPs")
writeData(wb, "Focused BPs", "Supplementary table 3. Gene ontology biological processes (BP) focused set, including annotations of all GO annotation offsprings", startCol = 1, startRow = 1)
addStyle(wb = wb, sheet = 1, rows = 1, cols = 1, style = TitleStyle1)
# Second
writeData(wb, "Focused BPs","GO Biological processes:", startCol = 1, startRow = 3)
addStyle(wb = wb, sheet = 1, rows = 3, cols = 1, style = TitleStyle2)
writeData(wb, "Focused BPs",paste(paste0(seq(1:6),". ",unique(bps_filtered_df$`BP Term description`)), collapse=", "), startCol = 1, startRow = 4)
addStyle(wb = wb, sheet = 1, rows = 4, cols = 1, style = TitleStyle3)
# Write table woth data
writeData(wb, "Focused BPs", bps_filtered_df, startCol = 1, startRow = 6, rowNames = FALSE,headerStyle = headerStyle1)

condition="BCR-"
for(i in seq(1:4)){
  # i=1
  # Create sheet
  addWorksheet(wb, paste0(condition,var[i]))
  # Start writing title
  # First
  writeData(wb, sheet=paste0(condition,var[i]), "Supplementary table 3. Gene ontology biological processes associated with the differentially upregulated genes in BCR Richness high samples compared to BCR Richness low samples", startCol = 1, startRow = 1)
  addStyle(wb = wb, sheet=paste0(condition,var[i]), rows = 1, cols = 1, style = TitleStyle1)
  # Second
  writeData(wb, sheet=paste0(condition,var[i]), paste0(var[i]," - GO BIOLOGICAL PROCESSES WITH A SIGNIFICANT ASSOCIATION TO THE UPREGULATED GENES (FDR<0.05) "), startCol = 1, startRow = 3)
  addStyle(wb = wb, sheet=paste0(condition,var[i]), rows = 3, cols = 1, style = TitleStyle2)
  # Third
  writeData(wb, sheet=paste0(condition,var[i]), "Adjusted p-value cutoff of enrichment test set to 0.05, method of adjustment used Benjamini-Hochberg", startCol = 1, startRow = 4)
  addStyle(wb = wb, sheet=paste0(condition,var[i]),  rows = 4, cols = 1, style = TitleStyle3)
  # Fourth
  writeData(wb, sheet=paste0(condition,var[i]), "Q-value cutoff of enrichment tests to report as significant set to 0.05. ", startCol = 1, startRow = 5)
  addStyle(wb = wb, sheet=paste0(condition,var[i]), rows = 5, cols = 1, style = TitleStyle3)
  # Write table woth data
  writeData(wb, paste0(condition,var[i]), bp_bcr_0[[i]], startCol = 1, startRow = 7, rowNames = TRUE,headerStyle = headerStyle1)

}

saveWorkbook(wb,paste0(projectOutDataPath,"/","Supplemental File 3.xlsx"), overwrite = TRUE)

5.1.4 SUPPLEMENTAL FILE 2: DEA

columnsDEGs <- c("Symbol","Ensembl_ID","Entrez_ID", "baseMean", "FC","lfcSE", "pvalue"      , "padj")

selectCols <- c("gene","ensembl","entrez","baseMean", "FC","lfcSE", "pvalue","padj")


########################################
var <- c("SKCM", "KIRC", "BLCA", "STAD")

degs <- list(skcm.res, kirc.res, blca.res, stad.res)
wb <- createWorkbook()
condition="BCR-"
for(i in seq(1:4)){
  # i=1
  # Create sheet
  addWorksheet(wb, paste0(condition,var[i]))
  # Start writing title
  # First
  writeData(wb, sheet=paste0(condition,var[i]), "Supplementary table 2. Differentially expressed genes in BCR Richness high samples compared to BCR Richness low samples", startCol = 1, startRow = 1)
  addStyle(wb = wb, sheet=paste0(condition,var[i]), rows = 1, cols = 1, style = TitleStyle1)
  # Second
  writeData(wb, sheet=paste0(condition,var[i]), paste0(var[i]," - DIFFERENTIALLY EXPRESSED GENES (FDR<0.05) "), startCol = 1, startRow = 3)
  addStyle(wb = wb, sheet=paste0(condition,var[i]), rows = 3, cols = 1, style = TitleStyle2)
  # Third
  writeData(wb, sheet=paste0(condition,var[i]), "Adjusted p-value cutoff (FDR) set to 0.05, method of adjustment used Benjamini-Hochberg", startCol = 1, startRow = 4)
  addStyle(wb = wb, sheet=paste0(condition,var[i]),  rows = 4, cols = 1, style = TitleStyle3)
  # Fourth
  writeData(wb, sheet=paste0(condition,var[i]), "apeglm estimator used for the log2 fold change (LFC) shrinkage and DESeq2 R package used for the differential expression analysis", startCol = 1, startRow = 5)
  addStyle(wb = wb, sheet=paste0(condition,var[i]), rows = 5, cols = 1, style = TitleStyle3)
  # Write table woth data
  writeData(wb, paste0(condition,var[i]), degs[[i]]$gL.df %>%
              dplyr::select(all_of(selectCols)) %>%
              setNames(columnsDEGs), startCol = 1, startRow = 7, rowNames = FALSE,headerStyle = headerStyle1)

}


saveWorkbook(wb,paste0(projectOutDataPath,"/","Supplemental File 2.xlsx"), overwrite = TRUE)

6 GO PDL1 high vs PDL1 low samples

6.1 LFC = 0

Load GO ORA results

newCol <- "PD1L1"
suffix <- paste0("_",newCol)

Only upregulated

6.1.1 Enrichment results

#Loading enrichment results
bp_pdl1<- loadRData(paste0(DEA_GOoutData,"bpGO_allTCGA_res",suffix,Rdata.suffix))
## Drop GO terms not included in our focused GO terms set,
bp_pdl1.filt <- sapply(bp_pdl1, function(x) dropGO(x, level = NULL, term = setdiff(x %>% as.data.frame() %>% pull(ID),bp_ImmuneRelated.focused.DF$ID)), simplify = FALSE)


if(isTRUE(runFiltSim)){
  # Then Similarity analysis on reduced terms
  bp_pdl1.filt.red <- sapply(bp_pdl1.filt, function(x) simplify(x, cutoff=0.45, by="p.adjust", select_fun=min, measure="Wang"), simplify = FALSE)
## REMEMBER: The higher the threshold, the more the redundant terms are removed
  save(bp_pdl1.filt.red,file=paste0(DEA_GOoutData,"bpGO_allTCGA_res_filt_reduced",suffix,Rdata.suffix))
}else{
  bp_pdl1.filt.red <- loadRData(paste0(DEA_GOoutData,"bpGO_allTCGA_res_filt_reduced",suffix,Rdata.suffix))

}

6.1.2 Reduced filtered BPs

bp_pdl1.filt.red.DFs <- sapply(bp_pdl1.filt.red, function(x) x %>% as.data.frame() %>% 
    dplyr::select(-geneID) %>% 
    dplyr::mutate(category=bp_ImmuneRelated.focused.DF$`Parent Description`[match(ID,bp_ImmuneRelated.focused.DF$ID)]) %>%
    dplyr::mutate("GeneRatio"=parse_ratio(GeneRatio)), simplify=FALSE)


bp_pdl1.filt.red.merged <- ldply(bp_pdl1.filt.red.DFs)


max(bp_pdl1.filt.red.DFs$SKCM$GeneRatio,bp_pdl1.filt.red.DFs$KIRC$GeneRatio,bp_pdl1.filt.red.DFs$BLCA$GeneRatio,bp_pdl1.filt.red.DFs$STAD$GeneRatio)
## [1] 0.06276661
min(bp_pdl1.filt.red.DFs$SKCM$GeneRatio,bp_pdl1.filt.red.DFs$KIRC$GeneRatio,bp_pdl1.filt.red.DFs$BLCA$GeneRatio,bp_pdl1.filt.red.DFs$STAD$GeneRatio)
## [1] 0.001599744
bubble_pdl1.cancers.filt.red <- sapply(bp_pdl1.filt.red.DFs , function(x) GO_bubblePlot(DF=x,
              sortVar="GeneRatio",xVar="GeneRatio",yVar="Description",sizeVar="Count",fillVar="p.adjust", aggreg=FALSE,choosePlot="norm",sizeMin=2,sizeMax=182, xMax=ceiling_dec(max(bp_pdl1.filt.red.merged$GeneRatio),2), seqN=10, fillMax=max(bp_pdl1.filt.red.merged$p.adjust)), simplify = FALSE)

pBub_pdl1.filt.red <-ggarrange(plotlist = bubble_pdl1.cancers.filt.red , 
          nrow=1, ncol=4, labels = c("SKCM", "KIRC", "BLCA", "STAD"),font.label = list(size = 14, color = "black", face = "bold", family = "Palatino Linotype"), common.legend = T, legend = "bottom")

pBub_pdl1.filt.red

6.1.3 SUPPLEMENTAL FILE 3: GO enrichment & similarity

wb <- loadWorkbook(paste0(projectOutDataPath,"/","Supplemental File 3.xlsx"))
condition="PDL1-"
for(i in seq(1:4)){
  # i=1
  # Create sheet
  addWorksheet(wb, paste0(condition,var[i]))
  # Start writing title
  # First
  writeData(wb, sheet=paste0(condition,var[i]), "Supplementary table 3. Gene ontology biological processes associated with the differentially upregulated genes in PD-L1 expression high samples compared to PD-L1 expression low samples", startCol = 1, startRow = 1)
  addStyle(wb = wb, sheet=paste0(condition,var[i]), rows = 1, cols = 1, style = TitleStyle1)
  # Second
  writeData(wb, sheet=paste0(condition,var[i]), paste0(var[i]," - GO BIOLOGICAL PROCESSES WITH A SIGNIFICANT ASSOCIATION TO THE UPREGULATED GENES (FDR<0.05) "), startCol = 1, startRow = 3)
  addStyle(wb = wb, sheet=paste0(condition,var[i]), rows = 3, cols = 1, style = TitleStyle2)
  # Third
  writeData(wb, sheet=paste0(condition,var[i]), "Adjusted p-value cutoff of enrichment test set to 0.05, method of adjustment used Benjamini-Hochberg", startCol = 1, startRow = 4)
  addStyle(wb = wb, sheet=paste0(condition,var[i]),  rows = 4, cols = 1, style = TitleStyle3)
  # Fourth
  writeData(wb, sheet=paste0(condition,var[i]), "Q-value cutoff of enrichment tests to report as significant set to 0.05. ", startCol = 1, startRow = 5)
  addStyle(wb = wb, sheet=paste0(condition,var[i]), rows = 5, cols = 1, style = TitleStyle3)
  # Write table woth data
  writeData(wb, paste0(condition,var[i]), bp_pdl1[[i]] %>% as.data.frame(), startCol = 1, startRow = 7, rowNames = FALSE,headerStyle = headerStyle1)

}

saveWorkbook(wb,paste0(projectOutDataPath,"/","Supplemental File 3.xlsx"), overwrite = TRUE)

6.1.4 SUPPLEMENTAL FILE 2: DEA

columnsDEGs <- c("Symbol","Ensembl_ID","Entrez_ID", "baseMean", "FC","lfcSE", "pvalue"      , "padj")

selectCols <- c("gene","ensembl","entrez","baseMean", "FC","lfcSE", "pvalue","padj")


wb <- loadWorkbook(paste0(projectOutDataPath,"/","Supplemental File 2.xlsx"))
degs <- list(skcm.res, kirc.res, blca.res, stad.res)
condition="PDL1-"
for(i in seq(1:4)){
  # i=1
  # Create sheet
  addWorksheet(wb, paste0(condition,var[i]))
  # Start writing title
  # First
  writeData(wb, sheet=paste0(condition,var[i]), "Supplementary table 2. Differentially expressed genes in PD-L1 expression high samples compared to PD-L1 expression low samples", startCol = 1, startRow = 1)
  addStyle(wb = wb, sheet=paste0(condition,var[i]), rows = 1, cols = 1, style = TitleStyle1)
  # Second
  writeData(wb, sheet=paste0(condition,var[i]), paste0(var[i]," - DIFFERENTIALLY EXPRESSED GENES (FDR<0.05) "), startCol = 1, startRow = 3)
  addStyle(wb = wb, sheet=paste0(condition,var[i]), rows = 3, cols = 1, style = TitleStyle2)
  # Third
  writeData(wb, sheet=paste0(condition,var[i]), "Adjusted p-value cutoff (FDR) set to 0.05, method of adjustment used Benjamini-Hochberg", startCol = 1, startRow = 4)
  addStyle(wb = wb, sheet=paste0(condition,var[i]),  rows = 4, cols = 1, style = TitleStyle3)
  # Fourth
  writeData(wb, sheet=paste0(condition,var[i]), "apeglm estimator used for the log2 fold change (LFC) shrinkage and DESeq2 R package used for the differential expression analysis", startCol = 1, startRow = 5)
  addStyle(wb = wb, sheet=paste0(condition,var[i]), rows = 5, cols = 1, style = TitleStyle3)
  # Write table woth data
  writeData(wb, paste0(condition,var[i]), degs[[i]]$gL.df %>%
              dplyr::select(all_of(selectCols)) %>%
              setNames(columnsDEGs), startCol = 1, startRow = 7, rowNames = FALSE,headerStyle = headerStyle1)

}


saveWorkbook(wb,paste0(projectOutDataPath,"/","Supplemental File 2.xlsx"), overwrite = TRUE)

7 GO CD8 high vs CD8 low samples

7.1 LFC = 0

Load GO ORA results

newCol <- "CD8"
suffix <- paste0("_",newCol)

Only upregulated

7.1.1 Enrichment results

# Loading enrichment results
bp_cd8<- loadRData(paste0(DEA_GOoutData,"bpGO_allTCGA_res",suffix,Rdata.suffix))
## Drop GO terms not included in our focused GO terms set,
bp_cd8.filt <- sapply(bp_cd8, function(x) dropGO(x, level = NULL, term = setdiff(x %>% as.data.frame() %>% pull(ID),bp_ImmuneRelated.focused.DF$ID)), simplify = FALSE)


if(isTRUE(runFiltSim)){
  # Then Similarity analysis on reduced terms
  bp_cd8.filt.red <- sapply(bp_cd8.filt, function(x) simplify(x, cutoff=0.45, by="p.adjust", select_fun=min, measure="Wang"), simplify = FALSE)
## REMEMBER: The higher the threshold, the more the redundant terms are removed
  save(bp_cd8.filt.red,file=paste0(DEA_GOoutData,"bpGO_allTCGA_res_filt_reduced",suffix,Rdata.suffix))

}else{
  bp_cd8.filt.red <- loadRData(paste0(DEA_GOoutData,"bpGO_allTCGA_res_filt_reduced",suffix,Rdata.suffix))

}

7.1.2 Reduced filtered BPs

bp_cd8.filt.red.DFs <- sapply(bp_cd8.filt.red, function(x) x %>% as.data.frame() %>% 
    dplyr::select(-geneID) %>% 
    dplyr::mutate(category=bp_ImmuneRelated.focused.DF$`Parent Description`[match(ID,bp_ImmuneRelated.focused.DF$ID)]) %>%
    dplyr::mutate("GeneRatio"=parse_ratio(GeneRatio)), simplify=FALSE)


bp_cd8.filt.red.merged <- ldply(bp_cd8.filt.red.DFs)


bubble_cd8.cancers.filt.red <- sapply(bp_cd8.filt.red.DFs , function(x) GO_bubblePlot(DF=x,
              sortVar="GeneRatio",xVar="GeneRatio",yVar="Description",sizeVar="Count",fillVar="p.adjust", aggreg=FALSE,choosePlot="norm",sizeMin=2,sizeMax=182, xMax=ceiling_dec(max(bp_cd8.filt.red.merged$GeneRatio),2), seqN=10, fillMax=max(bp_cd8.filt.red.merged$p.adjust)), simplify = FALSE)

pBub_cd8.filt.red <-ggarrange(plotlist = bubble_cd8.cancers.filt.red , 
          nrow=1, ncol=4, labels = c("SKCM", "KIRC", "BLCA", "STAD"),font.label = list(size = 14, color = "black", face = "bold", family = "Palatino Linotype"), common.legend = T, legend = "bottom")

pBub_cd8.filt.red

7.1.3 SUPPLEMENTAL FILE 3: GO enrichment & similarity

wb <- loadWorkbook(paste0(projectOutDataPath,"/","Supplemental File 3.xlsx"))
condition="CD8-"
for(i in seq(1:4)){
  # i=1
  # Create sheet
  addWorksheet(wb, paste0(condition,var[i]))
  # Start writing title
  # First
  writeData(wb, sheet=paste0(condition,var[i]), "Supplementary table 3. Gene ontology biological processes associated with the differentially upregulated genes in CD8+ T-cells infiltration high samples compared to CD8+ T-cells infiltration low samples", startCol = 1, startRow = 1)
  addStyle(wb = wb, sheet=paste0(condition,var[i]), rows = 1, cols = 1, style = TitleStyle1)
  # Second
  writeData(wb, sheet=paste0(condition,var[i]), paste0(var[i]," - GO BIOLOGICAL PROCESSES WITH A SIGNIFICANT ASSOCIATION TO THE UPREGULATED GENES (FDR<0.05) "), startCol = 1, startRow = 3)
  addStyle(wb = wb, sheet=paste0(condition,var[i]), rows = 3, cols = 1, style = TitleStyle2)
  # Third
  writeData(wb, sheet=paste0(condition,var[i]), "Adjusted p-value cutoff of enrichment test set to 0.05, method of adjustment used Benjamini-Hochberg", startCol = 1, startRow = 4)
  addStyle(wb = wb, sheet=paste0(condition,var[i]),  rows = 4, cols = 1, style = TitleStyle3)
  # Fourth
  writeData(wb, sheet=paste0(condition,var[i]), "Q-value cutoff of enrichment tests to report as significant set to 0.05. ", startCol = 1, startRow = 5)
  addStyle(wb = wb, sheet=paste0(condition,var[i]), rows = 5, cols = 1, style = TitleStyle3)
  # Write table woth data
  writeData(wb, paste0(condition,var[i]), bp_cd8[[i]] %>% as.data.frame(), startCol = 1, startRow = 7, rowNames = FALSE,headerStyle = headerStyle1)

}

saveWorkbook(wb,paste0(projectOutDataPath,"/","Supplemental File 3.xlsx"), overwrite = TRUE)

7.1.4 SUPPLEMENTAL FILE 2: DEA

columnsDEGs <- c("Symbol","Ensembl_ID","Entrez_ID", "baseMean", "FC","lfcSE", "pvalue"      , "padj")

selectCols <- c("gene","ensembl","entrez","baseMean", "FC","lfcSE", "pvalue","padj")



wb <- loadWorkbook(paste0(projectOutDataPath,"/","Supplemental File 2.xlsx"))
degs <- list(skcm.res, kirc.res, blca.res, stad.res)
condition="CD8-"
for(i in seq(1:4)){
  # i=1
  # Create sheet
  addWorksheet(wb, paste0(condition,var[i]))
  # Start writing title
  # First
  writeData(wb, sheet=paste0(condition,var[i]), "Supplementary table 2. Differentially expressed genes in CD8+ T-cells infiltration high samples compared to CD8+ T-cells infiltration samples", startCol = 1, startRow = 1)
  addStyle(wb = wb, sheet=paste0(condition,var[i]), rows = 1, cols = 1, style = TitleStyle1)
  # Second
  writeData(wb, sheet=paste0(condition,var[i]), paste0(var[i]," - DIFFERENTIALLY EXPRESSED GENES (FDR<0.05) "), startCol = 1, startRow = 3)
  addStyle(wb = wb, sheet=paste0(condition,var[i]), rows = 3, cols = 1, style = TitleStyle2)
  # Third
  writeData(wb, sheet=paste0(condition,var[i]), "Adjusted p-value cutoff (FDR) set to 0.05, method of adjustment used Benjamini-Hochberg", startCol = 1, startRow = 4)
  addStyle(wb = wb, sheet=paste0(condition,var[i]),  rows = 4, cols = 1, style = TitleStyle3)
  # Fourth
  writeData(wb, sheet=paste0(condition,var[i]), "apeglm estimator used for the log2 fold change (LFC) shrinkage and DESeq2 R package used for the differential expression analysis", startCol = 1, startRow = 5)
  addStyle(wb = wb, sheet=paste0(condition,var[i]), rows = 5, cols = 1, style = TitleStyle3)
  # Write table woth data
  writeData(wb, paste0(condition,var[i]), degs[[i]]$gL.df %>%
              dplyr::select(all_of(selectCols)) %>%
              setNames(columnsDEGs), startCol = 1, startRow = 7, rowNames = FALSE,headerStyle = headerStyle1)

}


saveWorkbook(wb,paste0(projectOutDataPath,"/","Supplemental File 2.xlsx"), overwrite = TRUE)

8 FIGURE 1

Figure 1 was produced using Visio.

9 FIGURE 2 : Correlations Intra & Inter

9.1 Correlation Networks with clusters

global.net$layers[[3]]$aes_params$family <- "Palatino Linotype"
global.net$layers[[3]]$aes_params$size <- 5
multi.net$layers[[3]]$aes_params$family <- "Palatino Linotype"
multi.net$layers[[3]]$aes_params$size <- 5

full_netClusters_v <- ggarrange(plotlist = list(global.net, multi.net), nrow=2,common.legend = TRUE, legend="bottom",labels = c("A. Pancancer", "B. Multicancer"),font.label = list(size = 15, color = "black", face = "bold", family = "Palatino Linotype"))



box.tcr.bcr <- ggdraw() + 
  draw_plot(TwosigScores_boxplot_tcga(tcgaData.tp.sub.filt,mycancertypes.reduced,"project","CD8cells","TCR_Richness",median,"median","CD8+ T cells infiltration","TCR Richness",-.04,-.00001,logY=FALSE, segm = c(250,500), ylims=c(0,1800), ticks = c(50,300),relHeights = c(1,0,.4),rollSum=TRUE),x = 0, y = 0.66, width = 1, height = 0.34) +
   draw_plot(TwosigScores_boxplot_tcga(tcgaData.tp.sub.filt,mycancertypes.reduced,"project","TIS_sigscore","TCR_Richness",median,"median","T cell inflamed GEP","TCR Richness",-.04,-.00001,logY=FALSE, segm = c(250,500), ylims=c(0,1800), ticks = c(40,300),relHeights = c(1,0,.4),rollSum=TRUE),x = 0, y = 0.33, width = 1, height = 0.34) +
  draw_plot(TwosigScores_boxplot_tcga(tcgaData.tp.sub.filt,mycancertypes.reduced,"project","B_cells","BCR_Richness",median,"median","B cells infiltration","BCR Richness",-.04,-.00001,logY=FALSE, segm = c(320,500), ylims=c(0,1800), ticks = c(50,300),relHeights = c(1,0,.4),rollSum=TRUE),x = 0, y = 0, width = 1, height = 0.34)  +
  draw_plot_label(label = c("C.", "D.","E."), size = 15,
                  x = c(0, 0, 0), y = c(1, 0.66,0.33),family="Palatino Linotype")



fig2.1 <-ggarrange(plotlist = list(full_netClusters_v, box.tcr.bcr), ncol=2,common.legend = FALSE,  
          widths = c(1, 1.5), heights = c(1,2), align="h")#labels = c("", ""),font.label = list(size = 12, color = 

fig2.1

cairo_pdf(filename = paste0(figuresPath,"Figure 2",".pdf"),width = 18, height=20)
  print(fig2.1)
dev.off()
## png 
##   2
# 
# ggsave(fig2.1, filename = paste0(figuresPath,"Fig2.v1",".tiff"), dpi = 600,width = 18, height=19.5, 
#        units = "in")

10 SUPPLEMENTAL FIGURE 1 (FILE 5): TCR richness correlation with Shannon entropy and evenness in TCGA

Produced using excel.

11 SUPPLEMENTAL FIGURE 2 (FILE 6): Anti-PD(L)1 data missingness analysis

Produced in the missingness analysis script.

12 SUPPLEMENTAL FIGURE 3 (FILE 7): Individual four-type multicancer correlations

multi.net.sep$SKCM$layers[[3]]$aes_params$family <- "Palatino Linotype"
multi.net.sep$SKCM$layers[[3]]$aes_params$size <- 5
multi.net.sep$KIRC$layers[[3]]$aes_params$family <- "Palatino Linotype"
multi.net.sep$KIRC$layers[[3]]$aes_params$size <- 5
multi.net.sep$STAD$layers[[3]]$aes_params$family <- "Palatino Linotype"
multi.net.sep$STAD$layers[[3]]$aes_params$size <- 5
multi.net.sep$BLCA$layers[[3]]$aes_params$family <- "Palatino Linotype"
multi.net.sep$BLCA$layers[[3]]$aes_params$size <- 5


sep_netClusters <- ggarrange(plotlist = multi.net.sep, common.legend = TRUE, legend="bottom",labels = c("A. SKCM", "B. KIRC", "C. BLCA", "D. STAD"),font.label = list(size = 15, color = "black", face = "bold", family = "Palatino Linotype"))


supp3 <-sep_netClusters

supp3

cairo_pdf(filename = paste0(figuresPath, "Supplemental Figure S3",".pdf"),width = 20, height=14)
  print(supp3)
dev.off()
## png 
##   2
# 
# ggsave(supp3, filename = paste0(figuresPath,"Supplemental Figure S3",".tiff"), dpi = 600,width = 20, height=14, 
#        units = "in")

13 FIGURE 3 : BCR-across cancer correlations & GO

GO terms filtered and reduced based on similarity analysis

fig3 <-ggdraw() +
  draw_plot(p.bcr_h,x = 0, y = 0.6, width = 1, height = .4) +
  draw_plot(pBub_bcr_0.filt.red,x = 0.01, y = 0, width = .97, height = .57) +
  draw_plot_label(label = c("C"), size = 16,
                  x = c(0), y = c(.58),family = "Palatino Linotype")

fig3

cairo_pdf(filename = paste0(figuresPath,"Figure 3",".pdf"),width = 25, height=17)
  print(fig3)
dev.off()
## png 
##   2
# 
# ggsave(fig3, filename = paste0(figuresPath,"Figure 3",".tiff"), dpi = 600,width = 25, height=17, 
#        units = "in")

14 Clinical response to ICI- Survival

Minimum number of events per predictor parameter, EPP: 7

## HERE we define for which parameters we want to do Univariable analysis, and which to include in the multivariate survival analysis. 
features <- featuresIn

## Define which columns include survival data
OS_time <- c("OS.time")
OS_status <- c("OS")
# PFS_time <- c("PFS.time")
# PFS_status <- c("PFS")


## DEFINE WHICH COLUMNS ARE CATEGORICAL
categ_feats <-featuresIn
## DEFINE WHICH COLUMNS ARE NUMERICAL
# we also add the status and time columns
numeric_feats <-featuresIn


finallabels <- featuresInLabels
names(finallabels) <- featuresIn

14.1 Load uni- and multivariate survival analysis results

14.1.1 TCGA

## SAVE DATA
### Univariable
## CAT
# uni.cat.meta.tcga<-loadRData(paste0(projectRDataPath,"uni.cat.meta.TCGA",Rdata.suffix))
uni.cat.meta4.tcga<-loadRData(paste0(survivalOutData,"uni.cat.meta4.TCGA",Rdata.suffix))
uni.cat.4.tcga <- loadRData(paste0(survivalOutData,"uni.cat.4.TCGA",Rdata.suffix))


### MULTIVARIATE
## CAT
# multi.cat.meta.tcga <- loadRData(paste0(projectRDataPath,"multi.cat.meta.TCGA",Rdata.suffix))
multi.cat.meta4.tcga <- loadRData(paste0(survivalOutData,"multi.cat.meta4.TCGA",Rdata.suffix))
multi.cat.4.tcga<- loadRData(paste0(survivalOutData,"multi.cat.4.TCGA",Rdata.suffix))


# Get lists with HRs merge them
uni.cat.4.tcga.hrlist <- sapply(uni.cat.4.tcga, function(x) x$res.df, simplify = FALSE)
uni.cat.4.tcga.hrDF <- ldply(uni.cat.4.tcga.hrlist,id="project")
colnames(uni.cat.4.tcga.hrDF)[1] <- "project"
multi.cat.4.tcga.hrlist <- sapply(multi.cat.4.tcga, function(x) x$res.df, simplify = FALSE)
multi.cat.4.tcga.hrDF <- ldply(multi.cat.4.tcga.hrlist,id="project")
colnames(multi.cat.4.tcga.hrDF)[1] <- "project"

uni.cat.4.tcga.hrDF$analysis <- rep("uni",nrow(uni.cat.4.tcga.hrDF))
multi.cat.4.tcga.hrDF$analysis <- rep("multi",nrow(multi.cat.4.tcga.hrDF))

# Merge
categorical.4.tcga <- rbind(uni.cat.4.tcga.hrDF,multi.cat.4.tcga.hrDF)
categorical.4.tcga$analysis <- factor(categorical.4.tcga$analysis,levels=c("uni","multi"))
analysisLabels <- c("Univariable","multivariate")
# significant
categorical.4.tcga$sign <- ifelse(categorical.4.tcga$p <= 0.05,"sign","notsign")
categorical.4.tcga$sign <- factor(categorical.4.tcga$sign, levels = c("sign","notsign"))
fillLabels <- c("significant","non-significant","NA value")


categorical.4.tcga$factor.name <- ifelse(categorical.4.tcga$factor.name=="CD8+ T cells infiltration",
                                         "CD8+ T-cells infiltration",
                                         ifelse(categorical.4.tcga$factor.name=="CD4+ T cells infiltration","CD4+ T-cells infiltration",categorical.4.tcga$factor.name))


categorical.4.tcga$factor.name <- factor(categorical.4.tcga$factor.name, levels = featuresInLabels)

categorical.4.tcga$project <- factor(categorical.4.tcga$project, levels = mycancertypes.final)

14.1.2 aPD1

## SAVE DATA
### Univariable
## CAT
uni.cat.meta.aPD1 <-loadRData(paste0(survivalOutData,"uni.cat.meta.aPD1",Rdata.suffix))
uni.cat.4.aPD1 <- loadRData(paste0(survivalOutData,"uni.cat.4.aPD1",Rdata.suffix))


### MULTIVARIATE
## CAT
multi.cat.meta.aPD1<- loadRData(paste0(survivalOutData,"multi.cat.meta.aPD1",Rdata.suffix))
multi.cat.4.aPD1 <-loadRData(paste0(survivalOutData,"multi.cat.4.aPD1",Rdata.suffix))
# Get lists with HRs merge them
uni.cat.4.aPD1.hrlist <- sapply(uni.cat.4.aPD1, function(x) x$res.df, simplify = FALSE)
uni.cat.4.aPD1.hrDF <- ldply(uni.cat.4.aPD1.hrlist,id="tissue")
colnames(uni.cat.4.aPD1.hrDF)[1] <- "project"
multi.cat.4.aPD1.hrlist <- sapply(multi.cat.4.aPD1, function(x) x$res.df, simplify = FALSE)
multi.cat.4.aPD1.hrDF <- ldply(multi.cat.4.aPD1.hrlist,id="tissue")
colnames(multi.cat.4.aPD1.hrDF)[1] <- "project"

uni.cat.4.aPD1.hrDF$project <- ifelse(uni.cat.4.aPD1.hrDF$project=="melanoma", "SKCM anti-PD-1/L1",
                                         ifelse(uni.cat.4.aPD1.hrDF$project=="RCC","KIRC anti-PD-1/L1",
                                                ifelse(uni.cat.4.aPD1.hrDF$project=="bladder","BLCA anti-PD-1/L1",                                             ifelse(uni.cat.4.aPD1.hrDF$project=="gastric","STAD anti-PD-1/L1",uni.cat.4.aPD1.hrDF$project))))

uni.cat.4.aPD1.hrDF$analysis <- rep("uni",nrow(uni.cat.4.aPD1.hrDF))

multi.cat.4.aPD1.hrDF$project <- ifelse(multi.cat.4.aPD1.hrDF$project=="melanoma", "SKCM anti-PD-1/L1",
                                         ifelse(multi.cat.4.aPD1.hrDF$project=="RCC","KIRC anti-PD-1/L1",
                                                ifelse(multi.cat.4.aPD1.hrDF$project=="bladder","BLCA anti-PD-1/L1",                                             ifelse(multi.cat.4.aPD1.hrDF$project=="gastric","STAD anti-PD-1/L1",multi.cat.4.aPD1.hrDF$project))))
multi.cat.4.aPD1.hrDF$analysis <- rep("multi",nrow(multi.cat.4.aPD1.hrDF))
# Merge
categorical.4.aPD1 <- rbind(uni.cat.4.aPD1.hrDF,multi.cat.4.aPD1.hrDF)
categorical.4.aPD1$analysis <- factor(categorical.4.aPD1$analysis,levels=c("uni","multi"))
analysisLabels <- c("Univariable","multivariate")
# significant
categorical.4.aPD1$sign <- ifelse(categorical.4.aPD1$p <= 0.05,"sign","notsign")
categorical.4.aPD1$sign <- factor(categorical.4.aPD1$sign, levels = c("sign","notsign"))
fillLabels <- c("significant","non-significant")


categorical.4.aPD1$factor.name <- ifelse(categorical.4.aPD1$factor.name=="CD8+ T cells infiltration",
                                         "CD8+ T-cells infiltration",
                                         ifelse(categorical.4.aPD1$factor.name=="CD4+ T cells infiltration","CD4+ T-cells infiltration",categorical.4.aPD1$factor.name))


categorical.4.aPD1$factor.name <- factor(categorical.4.aPD1$factor.name, levels = featuresInLabels)
## Look at max value, WE HAVE ISSUE
which(categorical.4.aPD1$HR==max(categorical.4.aPD1$HR, na.rm = T))
## [1] 33
categorical.4.aPD1[which(categorical.4.aPD1$HR==max(categorical.4.aPD1$HR, na.rm = T)),]
##              project     factor.id               factor.name factor.value
## 33 STAD anti-PD-1/L1 CD4cells:high CD4+ T-cells infiltration         high
##          HR Lower_CI Upper_CI    Inv_HR Inv_Lower_CI Inv_Upper_CI          p
## 33 2.139152 0.904116 5.061267 0.4674749     0.197579     1.106053 0.08352928
##    analysis    sign
## 33      uni notsign
categorical.4.aPD1$HR[which(categorical.4.aPD1$HR==max(categorical.4.aPD1$HR, na.rm = T))] <-Inf

# Fix
categorical.4.aPD1$project <- factor(categorical.4.aPD1$project, levels = paste0(mycancertypes.final," anti-PD-1/L1"))

14.1.3 Load multivariate bootstrapped results

# Load aPD1 bootstrapped
multi.boot.aPD1 <- loadRData(paste0(survivalOutData,"multi.cat.BOOT.aPD1",Rdata.suffix))
# Load  TCGA bootstrapped
multi.boot.TCGA <- loadRData(paste0(survivalOutData,"multi.cat.BOOT.TCGA",Rdata.suffix))

14.2 Merge univariate and multivariate analysis for TCGA and anti-PD(L)1 histologies

# Select objects with bootstrapped CIs and pvalues

multi.cat.4.aPD1.hrlist <- sapply(multi.boot.aPD1, function(x) x$bootCIpval, simplify = FALSE)
multi.cat.4.aPD1.hrDF <- ldply(multi.cat.4.aPD1.hrlist,id="project")
colnames(multi.cat.4.aPD1.hrDF)[1] <- "project"

multi.cat.4.aPD1.hrDF$analysis <- rep("Multivariable",nrow(multi.cat.4.aPD1.hrDF))
multi.cat.4.aPD1.hrDF$project <- ifelse(multi.cat.4.aPD1.hrDF$project=="melanoma", "SKCM",
                                         ifelse(multi.cat.4.aPD1.hrDF$project=="RCC","KIRC",
                                                ifelse(multi.cat.4.aPD1.hrDF$project=="bladder","BLCA",                                             ifelse(multi.cat.4.aPD1.hrDF$project=="gastric","STAD",multi.cat.4.aPD1.hrDF$project))))



################################
# Merge: Uni TCGA, Uni antiPD1, multi ALL
uni.cat.4.tcga.hrDF$cohort <- rep("TCGA", nrow(uni.cat.4.tcga.hrDF))
uni.cat.4.tcga.hrDF$analysis <- "Univariable"
uni.cat.4.tcga.hrDF$analysis <- paste0(uni.cat.4.tcga.hrDF$analysis, " - ", uni.cat.4.tcga.hrDF$cohort)

uni.cat.4.aPD1.hrDF$cohort <- rep("anti-PD-1/L1", nrow(uni.cat.4.aPD1.hrDF))
uni.cat.4.aPD1.hrDF$analysis <- "Univariable"
uni.cat.4.aPD1.hrDF$project <-str_replace(uni.cat.4.aPD1.hrDF$project," anti-PD-1/L1","")
uni.cat.4.aPD1.hrDF$analysis <- paste0(uni.cat.4.aPD1.hrDF$analysis, " - ", uni.cat.4.aPD1.hrDF$cohort)


multi.cat.4.aPD1.hrDF$cohort <- "anti-PD-1/L1"
multi.cat.4.aPD1.hrDF$analysis <- paste0(multi.cat.4.aPD1.hrDF$analysis, " - ", multi.cat.4.aPD1.hrDF$cohort)


####
categorical.4.tcga.aPD1 <- rbind(uni.cat.4.tcga.hrDF[,-c(4,8,9,10,13)],uni.cat.4.aPD1.hrDF[,-c(4,8,9,10,13)],multi.cat.4.aPD1.hrDF[,-c(8,10)])
categorical.4.tcga.aPD1$analysis <- factor(categorical.4.tcga.aPD1$analysis,levels=c("Univariable - TCGA","Univariable - anti-PD-1/L1", "Multivariable - anti-PD-1/L1"))
analysisLabels <- c("Univariable - TCGA","Univariable - anti-PD-1/L1", "Multivariable - anti-PD-1/L1")

##
categorical.4.tcga.aPD1$signif. <- ifelse(categorical.4.tcga.aPD1$p <= 0.05,"p <= 0.05",
                                       ifelse(categorical.4.tcga.aPD1$p <= 0.1,"p <= 0.1",
                                              "ns"))
categorical.4.tcga.aPD1$signif. <- factor(categorical.4.tcga.aPD1$signif., levels = c("p <= 0.1","p <= 0.05","ns"))
fillLabels <- c("p <= 0.1","p <= 0.05","ns")

##
categorical.4.tcga.aPD1$factor.name <- ifelse(categorical.4.tcga.aPD1$factor.name=="CD8+ T cells infiltration",
                                         "CD8+ T-cells infiltration",
                                         ifelse(categorical.4.tcga.aPD1$factor.name=="CD4+ T cells infiltration","CD4+ T-cells infiltration",categorical.4.tcga.aPD1$factor.name))

categorical.4.tcga.aPD1$factor.name <- factor(categorical.4.tcga.aPD1$factor.name, levels = featuresInLabels)

categorical.4.tcga.aPD1$project <- factor(categorical.4.tcga.aPD1$project, levels = mycancertypes.final)




categorical.4.tcga.aPD1$signif.<-factor(categorical.4.tcga.aPD1$signif., levels = c("ns","p <= 0.1","p <= 0.05"))

15 FIGURE 4 : Clinical response to ICI- ORR & Overall Survival

fig4 <-ggdraw() +
  draw_plot(pOrr,x = 0, y = 0.42, width = .98, height = .58) +
  draw_plot(p.m,x = 0.0, y = 0, width = .97, height = .39) +
  draw_plot_label(label = c("C"), size = 16,
                  x = c(0), y = c(.4),family = "Palatino Linotype")

fig4

cairo_pdf(filename = paste0(figuresPath,"Figure 4",".pdf"),width = 14, height=13)
  print(fig4)
dev.off()
## png 
##   2
# ggsave(fig4, filename = paste0(figuresPath,"Figure 4", ".tiff"), dpi = 600,width = 14, height=13, 
#        units = "in")

16 SUPPLEMENTAL FIGURE 4 (FILE 8): Gene ontology (GO) dotplots of the significantly enriched GO biological processes, contrast setting of high versus low PDL1/CD8 infiltration

supp5 <-ggdraw() +
  draw_plot(pBub_pdl1.filt.red,x = 0, y = .5, width = .99, height = .45) +
  draw_plot(pBub_cd8.filt.red,x = 0, y = 0, width = .99, height = .45) +
  draw_plot_label(label = c("A: PD-L1 high vs low", "B: CD8+ T cells infiltration high vs low"), size = 16,
                  x = c(-.035,-.07), y = c(.98,.48),family = "Palatino Linotype")

supp5


cairo_pdf(filename = paste0(figuresPath,"Supplemental Figure S4",".pdf"),width = 24, height=21)
  print(supp5)
dev.off()
## png 
##   2
# 
# 
# ggsave(supp5, filename = paste0(figuresPath,"Supplemental Figure S4",".tiff"), dpi = 600,width = 24, height=21, 
#        units = "in")

17 SUPPLEMENTAL FIGURE 5 (FILE 9): Objective response rate correlations with median logTMB, PD-L1 expression, TIS-GEP, CD8+, CD4+ T-cells, B cells infiltration, tumor purity across 26 TCGA tumor types

png 2

18 SUPPLEMENTAL FIGURE 6 (FILE 10): Univariable Cox proportional hazards analysis - KM plots for TCR and BCR richness

[1] “>>Final total 449 patients included<<” [1] “LOCAL MEDIAN - SKCM - TCR_Richness: 13.5” [1] “>>Final total 535 patients included<<” [1] “LOCAL MEDIAN - KIRC - TCR_Richness: 33” [1] “>>Final total 411 patients included<<” [1] “LOCAL MEDIAN - BLCA - TCR_Richness: 5” [1] “>>Final total 443 patients included<<” [1] “LOCAL MEDIAN - STAD - TCR_Richness: 95” [1] “>>Final total 449 patients included<<” [1] “LOCAL MEDIAN - SKCM - TCR_Richness: 13.5” [1] “>>Final total 535 patients included<<” [1] “LOCAL MEDIAN - KIRC - TCR_Richness: 33” [1] “>>Final total 411 patients included<<” [1] “LOCAL MEDIAN - BLCA - TCR_Richness: 5” [1] “>>Final total 443 patients included<<” [1] “LOCAL MEDIAN - STAD - TCR_Richness: 95” [1] “>>Final total 449 patients included<<” [1] “LOCAL MEDIAN - SKCM - BCR_Richness: 24.5” [1] “>>Final total 535 patients included<<” [1] “LOCAL MEDIAN - KIRC - BCR_Richness: 6” [1] “>>Final total 411 patients included<<” [1] “LOCAL MEDIAN - BLCA - BCR_Richness: 9” [1] “>>Final total 443 patients included<<” [1] “LOCAL MEDIAN - STAD - BCR_Richness: 121” [1] “>>Final total 449 patients included<<” [1] “LOCAL MEDIAN - SKCM - BCR_Richness: 24.5” [1] “>>Final total 535 patients included<<” [1] “LOCAL MEDIAN - KIRC - BCR_Richness: 6” [1] “>>Final total 411 patients included<<” [1] “LOCAL MEDIAN - BLCA - BCR_Richness: 9” [1] “>>Final total 443 patients included<<” [1] “LOCAL MEDIAN - STAD - BCR_Richness: 121” [1] “>>Final total 220 patients included<<” [1] “LOCAL MEDIAN - SKCM anti-PD-1/L1 - TCR_Richness: 20” [1] “>>Final total 88 patients included<<” [1] “LOCAL MEDIAN - KIRC anti-PD-1/L1 - TCR_Richness: 21.5” [1] “>>Final total 344 patients included<<” [1] “LOCAL MEDIAN - BLCA anti-PD-1/L1 - TCR_Richness: 10” [1] “>>Final total 43 patients included<<” [1] “LOCAL MEDIAN - STAD anti-PD-1/L1 - TCR_Richness: 42” [1] “>>Final total 220 patients included<<” [1] “LOCAL MEDIAN - SKCM anti-PD-1/L1 - TCR_Richness: 20” [1] “>>Final total 88 patients included<<” [1] “LOCAL MEDIAN - KIRC anti-PD-1/L1 - TCR_Richness: 21.5” [1] “>>Final total 344 patients included<<” [1] “LOCAL MEDIAN - BLCA anti-PD-1/L1 - TCR_Richness: 10” [1] “>>Final total 43 patients included<<” [1] “LOCAL MEDIAN - STAD anti-PD-1/L1 - TCR_Richness: 42” [1] “>>Final total 220 patients included<<” [1] “LOCAL MEDIAN - SKCM anti-PD-1/L1 - BCR_Richness: 86.5” [1] “>>Final total 88 patients included<<” [1] “LOCAL MEDIAN - KIRC anti-PD-1/L1 - BCR_Richness: 89” [1] “>>Final total 344 patients included<<” [1] “LOCAL MEDIAN - BLCA anti-PD-1/L1 - BCR_Richness: 200” [1] “>>Final total 43 patients included<<” [1] “LOCAL MEDIAN - STAD anti-PD-1/L1 - BCR_Richness: 1980” [1] “>>Final total 220 patients included<<” [1] “LOCAL MEDIAN - SKCM anti-PD-1/L1 - BCR_Richness: 86.5” [1] “>>Final total 88 patients included<<” [1] “LOCAL MEDIAN - KIRC anti-PD-1/L1 - BCR_Richness: 89” [1] “>>Final total 344 patients included<<” [1] “LOCAL MEDIAN - BLCA anti-PD-1/L1 - BCR_Richness: 200” [1] “>>Final total 43 patients included<<” [1] “LOCAL MEDIAN - STAD anti-PD-1/L1 - BCR_Richness: 1980”

png 2

19 SUPPLEMENTAL FIGURE 7 (FILE 11): Univariable Cox proportional hazards analysis - KM plots for logTMB, PD-L1 expression, TIS-GEP

## SURVIVAL DATA
tcga.SURV <-  sapply(featuresIn[3:9], function(x) loadRData(paste0(survivalOutData, paste0("tcga.SURV","_","OS","_",x),Rdata.suffix))$km,simplify = FALSE)

aPD1.SURV <- sapply(featuresIn[3:9], function(x) loadRData(paste0(survivalOutData, paste0("aPD1.SURV","_","OS","_",x),Rdata.suffix))$km,simplify = FALSE)


  
res.km <-sapply(featuresIn[3:9], function(x) survComboKM(tcga.SURV,aPD1.SURV,x), simplify = FALSE)
supp7 <-ggarrange(plotlist=res.km[1:3], 
          labels = paste0(c("A: ", "B: ", "C: "),featuresInLabels[3:5],c(" TCGA vs anti-PD1/L1", " TCGA vs anti-PD1/L1", " TCGA vs anti-PD1/L1")),#c("A", "B", "C")
          ncol = 1, nrow = 3,
          label.x=c(-0.105,-0.135,-0.142),
          font.label = list(size = 14, color = "black", face = "bold", family = "Palatino Linotype"))#,c(" Univariable", " Univariable", " Univariable")

supp7

cairo_pdf(filename = paste0(figuresPath,"Supplemental Figure S7",".pdf"),family = "Palatino Linotype", width = 14, height=22)
  print(supp7)
dev.off()

png 2

# ggsave(supp7, filename = paste0(figuresPath,"Supplemental Figure S7", ".tiff"), dpi = 600,width = 14, 
#        height = 22, 
#        units = "in")

20 SUPPLEMENTAL FIGURE 8 (FILE 12): Univariable Cox proportional hazards analysis - KM plots for CD8+, CD4+ T-cells, B cells infiltration, tumor purity

supp8 <-ggarrange(plotlist=res.km[4:7], 
          labels = paste0(c("A: ", "B: ", "C: ","D: "),featuresInLabels[6:9],c(" TCGA vs anti-PD1/L1", " TCGA vs anti-PD1/L1", " TCGA vs anti-PD1/L1")),
          ncol = 1, nrow = 4,
          label.x=c(-0.155,-0.155,-0.135,-0.123),
          font.label = list(size = 14, color = "black", face = "bold", family = "Palatino Linotype"))


supp8

cairo_pdf(filename = paste0(figuresPath,"Supplemental Figure S8",".pdf"),family = "Palatino Linotype", width = 14, height=26)
  print(supp8)
dev.off()

png 2

# ggsave(supp8, filename = paste0(figuresPath,"Supplemental Figure S8",".tiff"), dpi = 600,width = 14, 
#        height = 26, 
#        units = "in")

21 Prediction of response to ICI

modelTypeA <- "_TCRArichnessIGH.ds"
modelTypeB <- "_noLIU"
modelTypeC <- "_mintcr4bcr10"
###
## DATA
###
# Keep samples with TMB (.tmb)
immdata.TcrBcr.full.red.sub.tmb <- immdata.TcrBcr.full.red.sub %>% dplyr::filter(complete.cases(logTMB)) %>% droplevels()#;nrow(immdata.TcrBcr.full.red.sub.tmb)
# ONLY MELANOMA, WITH TMB (.tmb.MEL)
immdata.TcrBcr.full.red.sub.tmb.MEL <- immdata.TcrBcr.full.red.sub.tmb %>% dplyr::filter(tissue=="melanoma") %>% droplevels()#;nrow(immdata.TcrBcr.full.red.sub.tmb.MEL)
## ALL MELANOMA AND BLADDER, WITH TMB (.tmb.ALL)
immdata.TcrBcr.full.red.sub.tmb.ALL <- immdata.TcrBcr.full.red.sub.tmb %>% dplyr::filter(tissue %in% c("melanoma","bladder")) %>% droplevels()#;nrow(immdata.TcrBcr.full.red.sub.tmb.ALL)
# ALL DATA, MELANOMA AND BLADDER, BUT LEAVE MISSING TMB (.ALL)
immdata.TcrBcr.full.red.sub.ALL <- immdata.TcrBcr.full.red.sub  %>% dplyr::filter(tissue %in% c("melanoma","bladder")) %>% droplevels()#;nrow(immdata.TcrBcr.full.red.sub.ALL)

## INSTEAD OF FILTERING FOR NO MISSING TMB, I DO THAT FOR ALL COVARIATES OF INTEREST
# Remove rows that have outliers/missing values since I get errors with Knn imputation
immdata.TcrBcr.full.red.sub.ALL.filt <- immdata.TcrBcr.full.red.sub.ALL %>% filter_at(vars(covariatesIn), all_vars(complete.cases(.))) %>% droplevels()
# DOING THE SAME FOR THE ORIGINAL DATA - USE THIS WHEN TESTING
# these data include other tissues apart from melanoma and bladder
immdata.TcrBcr.full.red.sub.filt <- immdata.TcrBcr.full.red.sub  %>% filter_at(vars(covariatesIn), all_vars(complete.cases(.))) %>% droplevels()

# FINAL DATA FOR MODELLING
# Remove the Liu dataset for the model testing
immdata.TcrBcr.BLAD.MEL.ready <-immdata.TcrBcr.full.red.sub.ALL.filt %>% dplyr::filter(dataset %notin% c("Liu") ) %>% droplevels() # All melanoma & bladder, PD, CR, PR, complete data
immdata.TcrBcr.ALL.ready <- immdata.TcrBcr.full.red.sub.filt %>% dplyr::filter(dataset %notin% c("Liu") ) %>% droplevels() # All tissues, PD, CR, PR, complete data

## For gastric
immdata.TcrBcr.full.red.sub.filt.gastric <- immdata.TcrBcr.full.red.sub %>% dplyr::filter(tissue %in% c("gastric") ) %>%  
                                            filter_at(vars(covariatesIn[-6]), all_vars(complete.cases(.))) %>% droplevels()

Load test data for predictive models

#################
## LOAD TEST DATA FOR MODELS
testDF <- loadRData(paste0(testDataOut,"testDF",modelTypeA,modelTypeB, modelTypeC,".RData"))
data.testX <- as.data.frame(testDF[[1]][,which(colnames(testDF[[1]]) %in% covariatesIn)])

## for gastric
testDF.gastric <-loadRData(paste0(testDataOut,"testDF.gastric",modelTypeA,modelTypeB, modelTypeC,".RData"))
# Addind Liu dataset
test.ALL <- rbind(testDF[[1]],immdata.TcrBcr.full.red.sub.ALL.filt %>% dplyr::filter(dataset %in% c("Liu") ) %>% droplevels())
data.testX.ALL <- as.data.frame(test.ALL[,which(colnames(test.ALL) %in% covariatesIn)])

test.DF.melanoma <- test.ALL %>% dplyr::filter(tissue %in% c("melanoma")) %>% droplevels()
data.testX.melanoma <- as.data.frame(test.DF.melanoma[,which(colnames(test.DF.melanoma) %in% covariatesIn)])

test.DF.bladder <- testDF[[1]] %>% dplyr::filter(tissue %in% c("bladder")) %>% droplevels()
data.testX.bladder <- as.data.frame(test.DF.bladder[,which(colnames(test.DF.bladder) %in% covariatesIn)])

test.DF.rcc <- testDF[[1]] %>% dplyr::filter(tissue %in% c("RCC")) %>% droplevels()
data.testX.rcc <- as.data.frame(test.DF.rcc[,which(colnames(test.DF.rcc) %in% covariatesIn)])

test.DF.gastric <- testDF.gastric
data.testX.gastric <- as.data.frame(test.DF.gastric[,which(colnames(test.DF.gastric) %in% covariatesIn[-6])])

# LOAD FINAL MODELS
models.final <- loadRData(paste0(modelsInterm,"models.final",modelTypeA,modelTypeB, modelTypeC,".RData"))
sigs <- c("TCRArichness.ds", "logTMB","PDL1","BCR.IGHrichness.ds","TIS_sigscore",
          paste(c("TCRArichness.ds","BCR.IGHrichness.ds"),collapse = " + "),
          paste(c("TCRArichness.ds", "logTMB"),collapse = " + "),
          paste(c("TCRArichness.ds", "PDL1"),collapse = " + "),
          paste(c("TCRArichness.ds", "TIS_sigscore"),collapse = " + "),
          paste(c("BCR.IGHrichness.ds", "logTMB"),collapse = " + "),
          paste(c("BCR.IGHrichness.ds", "PDL1"),collapse = " + "),
          paste(c("BCR.IGHrichness.ds", "TIS_sigscore"),collapse = " + "),
          paste(c("logTMB", "PDL1"),collapse = " + "),
          paste(c("logTMB", "TIS_sigscore"),collapse = " + "),
          paste(c("PDL1","TIS_sigscore"),collapse = " + "),
          # TRI
          paste(c("TCRArichness.ds","BCR.IGHrichness.ds","logTMB"),collapse = " + "),
          paste(c("TCRArichness.ds","BCR.IGHrichness.ds","PDL1"),collapse = " + "),
          paste(c("TCRArichness.ds","BCR.IGHrichness.ds","TIS_sigscore"),collapse = " + "),
          paste(c("TCRArichness.ds","logTMB","PDL1"),collapse = " + "),
          paste(c("TCRArichness.ds","logTMB","TIS_sigscore"),collapse = " + "),
          paste(c("TCRArichness.ds","PDL1","TIS_sigscore"),collapse = " + "),
          paste(c("BCR.IGHrichness.ds","logTMB","PDL1"),collapse = " + "),
          paste(c("BCR.IGHrichness.ds","logTMB","TIS_sigscore"),collapse = " + "),
          paste(c("BCR.IGHrichness.ds","PDL1","TIS_sigscore"),collapse = " + "),
          paste(c("logTMB","PDL1","TIS_sigscore"),collapse = " + "),
          #QUADRA
          paste(c("TCRArichness.ds","BCR.IGHrichness.ds","logTMB","PDL1"),collapse = " + "),
          paste(c("TCRArichness.ds","BCR.IGHrichness.ds","logTMB","TIS_sigscore"),collapse = " + "),
          paste(c("TCRArichness.ds","BCR.IGHrichness.ds","PDL1","TIS_sigscore"),collapse = " + "),
          paste(c("TCRArichness.ds","logTMB","PDL1","TIS_sigscore"),collapse = " + "),
          paste(c("BCR.IGHrichness.ds", "logTMB","PDL1","TIS_sigscore"),collapse = " + "),
          #ALL
          paste(c("TCRArichness.ds","BCR.IGHrichness.ds","logTMB","PDL1","TIS_sigscore"),collapse = " + "),
          "all predictors")

sigs_axisNames<- c(rep("Univariate Logistic Regression",5),
                   rep("Bivariate Logistic Regression",10),
                   rep("Trivariate Logistic Regression",10),
                   rep("Quadravariate Logistic Regression",5),
                   rep("5-variate Logistic Regression",1),
                   "RFE with Naive Bayes")
sigs_axisNames<-names(models.final)
sigs_titleNames<- names(models.final)


ClassLevels = c("CRPR","PD")
rfeMode <- c(rep(FALSE,length(models.final)-1),
             TRUE)
indexUniRFE <-c(1:5,32)
indexUni<- c(1:5)
indexRFE <- 6
indexBiv <-c(6:15)
indexTriv<-c(16:25)
indexQuad5<-c(26:32)
indexGASTRIC <-c(1,2,4,5)

21.1 Table with ROC curves- SCHEME 1- Histology Non-specific

21.2 Table with ROC curves- SCHEME 1- melanoma

test.DF.melanoma <- test.ALL %>% dplyr::filter(tissue %in% c("melanoma")) %>% droplevels()
data.testX.melanoma <- as.data.frame(test.DF.melanoma[,which(colnames(test.DF.melanoma) %in% covariatesIn)])

21.3 Table with ROC curves- SCHEME 1- bladder

test.DF.bladder <- testDF[[1]] %>% dplyr::filter(tissue %in% c("bladder")) %>% droplevels()
data.testX.bladder <- as.data.frame(test.DF.bladder[,which(colnames(test.DF.bladder) %in% covariatesIn)])

21.4 Table with ROC curves- SCHEME 1- RCC

test.DF.rcc <- testDF[[1]] %>% dplyr::filter(tissue %in% c("RCC")) %>% droplevels()
data.testX.rcc <- as.data.frame(test.DF.rcc[,which(colnames(test.DF.rcc) %in% covariatesIn)])

21.5 Table with ROC curves- SCHEME 1- Gastric

test.DF.gastric <- immdata.TcrBcr.full.red.sub %>% dplyr::filter(tissue %in% c("gastric")) %>% distinct() %>% droplevels()
data.testX.gastric <- as.data.frame(test.DF.gastric[,which(colnames(test.DF.gastric) %in% covariatesIn[-6])])


models.gastric <-models.final[c(1:2,4:5,6,8:9,11:12,15,17:18,21,24,28)]
names(models.gastric) <- c('M01: R ~ TCRa richness','M02: R ~ BCRigh richness', 
                      # "M3: R ~ logTMB",
                      'M04: R ~ PD-L1',
                      'M05: R ~ TIS GEP',
                      #BIVAR
                      "M06 (LR): R ~ TCR richness + BCR richness",
                      # "M7 (LR): R ~ TCR richness + logTMB", 
                      "M08 (LR): R ~ TCR richness + PD-L1", 
                      "M09 (LR): R ~ TCR richness + TIS GEP",
                      # "M10 (LR): R ~ BCR richness + logTMB",
                      "M11 (LR): R ~ BCR richness + PD-L1",
                      "M12 (LR): R ~ BCR richness + TIS GEP",
                      # "M13 (LR): R ~ logTMB + PD-L1",
                      # "M14 (LR): R ~ logTMB + TIS GEP",
                      "M15 (LR): R ~ PD-L1 + TIS GEP",
                      #TRIVAR
                      # "M16 (LR): R ~ TCR richness + BCR richness + logTMB",
                      "M17 (LR): R ~ TCR richness + BCR richness + PD-L1",
                      "M18 (LR): R ~ TCR richness + BCR richness + TIS GEP",
                      # "M19 (LR): R ~ TCR richness + logTMB + PD-L1", 
                      # "M20 (LR): R ~ TCR richness + logTMB + TIS GEP",
                      "M21 (LR): R ~ TCR richness + PD-L1 + TIS GEP",   
                      # "M22 (LR): R ~ BCR richness + logTMB + PD-L1",
                      # "M23 (LR): R ~ BCR richness + logTMB + TIS GEP",
                      "M24 (LR): R ~ BCR richness + PD-L1 + TIS GEP",
                      # "M25 (LR): R ~ logTMB + PD-L1 + TIS GEP", 
                      #QUADRAVAR
                      # "M26 (LR): R ~ TCR richness + BCR richness + logTMB + PD-L1",
                      # "M27 (LR): R ~ TCR richness + BCR richness + logTMB + TIS GEP",
                      "M28 (LR): R ~ TCR richness + BCR richness + PD-L1 + TIS GEP"
                      # "M29 (LR): R ~ TCR richness + logTMB + PD-L1 + TIS GEP",
                      # "M30 (LR): R ~  BCR richness + logTMB + PD-L1 + TIS GEP",
                      # "M31 (LR): R ~ TCR richness + BCR richness + logTMB + PD-L1 + TIS GEP",
                      )


sigs <- c("TCRArichness.ds", "BCR.IGHrichness.ds","PDL1","TIS_sigscore",
          paste(c("TCRArichness.ds","BCR.IGHrichness.ds"),collapse = " + "),
          # paste(c("TCRArichness.ds", "logTMB"),collapse = " + "),
          paste(c("TCRArichness.ds", "PDL1"),collapse = " + "),
          paste(c("TCRArichness.ds", "TIS_sigscore"),collapse = " + "),
          # paste(c("BCR.IGHrichness.ds", "logTMB"),collapse = " + "),
          paste(c("BCR.IGHrichness.ds", "PDL1"),collapse = " + "),
          paste(c("BCR.IGHrichness.ds", "TIS_sigscore"),collapse = " + "),
          # paste(c("logTMB", "PDL1"),collapse = " + "),
          # paste(c("logTMB", "TIS_sigscore"),collapse = " + "),
          paste(c("PDL1","TIS_sigscore"),collapse = " + "),
          # TRI
          # paste(c("TCRArichness.ds","BCR.IGHrichness.ds","logTMB"),collapse = " + "),
          paste(c("TCRArichness.ds","BCR.IGHrichness.ds","PDL1"),collapse = " + "),
          paste(c("TCRArichness.ds","BCR.IGHrichness.ds","TIS_sigscore"),collapse = " + "),
          # paste(c("TCRArichness.ds","logTMB","PDL1"),collapse = " + "),
          # paste(c("TCRArichness.ds","logTMB","TIS_sigscore"),collapse = " + "),
          paste(c("TCRArichness.ds","PDL1","TIS_sigscore"),collapse = " + "),
          # paste(c("BCR.IGHrichness.ds","logTMB","PDL1"),collapse = " + "),
          # paste(c("BCR.IGHrichness.ds","logTMB","TIS_sigscore"),collapse = " + "),
          paste(c("BCR.IGHrichness.ds","PDL1","TIS_sigscore"),collapse = " + "),
          # paste(c("logTMB","PDL1","TIS_sigscore"),collapse = " + "),
          #QUADRA
          # paste(c("TCRArichness.ds","BCR.IGHrichness.ds","logTMB","PDL1"),collapse = " + "),
          # paste(c("TCRArichness.ds","BCR.IGHrichness.ds","logTMB","TIS_sigscore"),collapse = " + "),
          paste(c("TCRArichness.ds","BCR.IGHrichness.ds","PDL1","TIS_sigscore"),collapse = " + ")
          # paste(c("TCRArichness.ds","logTMB","PDL1","TIS_sigscore"),collapse = " + "),
          # paste(c("BCR.IGHrichness.ds", "logTMB","PDL1","TIS_sigscore"),collapse = " + "),
          #ALL
          # paste(c("TCRArichness.ds","BCR.IGHrichness.ds","logTMB","PDL1","TIS_sigscore"),collapse = " + "),
          # "all predictors"
          )

sigs_axisNames<- c(rep("Univariate Logistic Regression",4),
                   rep("Bivariate Logistic Regression",6),
                   rep("Trivariate Logistic Regression",4),
                   rep("Quadravariate Logistic Regression",1)
                   )
sigs_axisNames<-names(models.gastric)
sigs_titleNames<- names(models.gastric)

21.6 Table with ROC curves- SCHEME 2 - BLCA

21.7 Table with ROC curves- SCHEME 3 - MELANOMA

22 SUPPLEMENTAL FILE 4: Tables of ROC-AUC values of univariable models of established biomarkers, TCR richness, BCR richness compared to the bivariable and multivariable models in histology non specific scheme (1), BLCA-specific (2), SKCM-specific (3)

rocs_schemes <- list(`BLCA+\nSKCM+\nKIRC\n(110) `=roc_values,
                     `BLCA\n(59) ` = roc_values.blad,
                     `SKCM\n(95) ` = roc_values.mel,
                     `KIRC\n(31) ` = roc_values.rcc,
                     `STAD\n(29) ` = roc_values.gas,
                     `BLCA\n(53) ` = roc_values.blad.spec,
                     `SKCM\n(31) ` = roc_values.mel.spec)

# Change column names

rocs_schemes <- sapply(rocs_schemes, function(x) x %>% 
                         dplyr::select(-dsids,-curvetypes,-NOfeaturesOut,-optPredictors) %>% 
                         setNames(c("Model", "ROC-AUC", "Trained_on", "Tested_on","Sample_test")), simplify=FALSE)
schemesTitles <- c("Scheme 1 - Histology non-specific models",
                   "Scheme 1 - Histology non-specific models",
                   "Scheme 1 - Histology non-specific models",
                   "Scheme 1 - Histology non-specific models",
                   "Scheme 1 - Histology non-specific models",
                   "Scheme 2 - BLCA-specific models",
                   "Scheme 3 - SKCM-specific models")
trainedOn <- c("Trained on BLCA, SKCM (excl. Liu et al. dataset) anti-PD-1/L1 datasets",
               "Trained on BLCA, SKCM (excl. Liu et al. dataset) anti-PD-1/L1 datasets",
               "Trained on BLCA, SKCM (excl. Liu et al. dataset) anti-PD-1/L1 datasets",
               "Trained on BLCA, SKCM (excl. Liu et al. dataset) anti-PD-1/L1 datasets",
               "Trained on BLCA, SKCM (excl. Liu et al. dataset) anti-PD-1/L1 datasets",
               "Trained on BLCA anti-PD-1/L1 datasets",
               "Trained on SKCM (incl. Liu et al. dataset) anti-PD-1/L1 datasets")
testedOn <- c("Tested on BLCA, SKCM (excl. Liu et al. dataset) and KIRC anti-PD-1/L1 datasets",
              "Tested on BLCA anti-PD-1/L1 datasets",
              "Tested on SKCM anti-PD-1/L1 datasets",
              "Tested on external validation KIRC anti-PD-1/L1 dataset",
              "Tested on one external validation STAD anti-PD-1/L1 dataset",
              "Tested on BLCA anti-PD-1/L1 dataset",
              "Tested on SKCM anti-PD-1/L1 dataset")

schemeSheet <- c("S1-All",
                 "S1-BLCA",
                 "S1-SKCM",
                 "S1-KIRC",
                 "S1-STAD",
                 "S2-BLCA specific",
                 "S3-SKCM specific")
wb <- createWorkbook()
# condition="BCR-"
for(i in seq(1:7)){
  # i=1
  # Create sheet
  addWorksheet(wb, schemeSheet[i])
  # Start writing title
  # First
  writeData(wb, sheet=schemeSheet[i], "Supplementary table 4. ROC-AUC values of univariable models of established biomarkers (logTMB, PD-L1, GEP), TCR richness, BCR richness compared to the bivariable and multivariable models (including the Recursive Feature Elimination model). Multivariable models include additional potential biomarkers CD8+ and CD4+ T-cells infiltration, B-cells infiltration and Tumor Purity. Models were evaluated on separate test data.", startCol = 1, startRow = 1)
  addStyle(wb = wb, sheet=schemeSheet[i], rows = 1, cols = 1, style = TitleStyle1)
  # Second
  writeData(wb, sheet=schemeSheet[i], schemesTitles[i], startCol = 1, startRow = 3)
  addStyle(wb = wb, sheet=schemeSheet[i], rows = 3, cols = 1, style = TitleStyle2)
  # Third
  writeData(wb, sheet=schemeSheet[i], trainedOn[i], startCol = 1, startRow = 4)
  addStyle(wb = wb, sheet=schemeSheet[i],  rows = 4, cols = 1, style = TitleStyle3)
  # Fourth
  writeData(wb, sheet=schemeSheet[i], testedOn[i], startCol = 1, startRow = 5)
  addStyle(wb = wb, sheet=schemeSheet[i], rows = 5, cols = 1, style = TitleStyle3)
  # Write table woth data
  writeData(wb,schemeSheet[i], rocs_schemes[[i]] %>% as.data.frame(), startCol = 1, startRow = 7, rowNames = FALSE,headerStyle = headerStyle1)

}


saveWorkbook(wb,paste0(projectOutDataPath,"/","Supplemental File 4.xlsx"), overwrite = TRUE)

23 FIGURE 5 : Models performance - ROC-AUC

rocs_schemes_merged_DF <- ldply(rocs_schemes)

keep_models <- unique(rocs_schemes_merged_DF$Model)[c(1:5,32:34)]
# keep_models
rocs_schemes_merged_DF.sub <- rocs_schemes_merged_DF %>% dplyr::filter(Model %in% keep_models) %>% droplevels()
colnames(rocs_schemes_merged_DF.sub)[1] <- "scheme"

rocs_schemes_merged_DF.sub$scheme <- factor(rocs_schemes_merged_DF.sub$scheme, levels=rev(rocs_schemes_merged_DF.sub$scheme %>% unique()))

rocs_schemes_merged_DF.sub$Model <- ifelse(rocs_schemes_merged_DF.sub$Model=="M01: R ~ TCRa richness", "TCR Richness",
                                         ifelse(rocs_schemes_merged_DF.sub$Model=="M02: R ~ BCRigh richness", "BCR Richness",
                                                ifelse(rocs_schemes_merged_DF.sub$Model=="M03: R ~ logTMB", "logTMB",
                                                       ifelse(rocs_schemes_merged_DF.sub$Model=="M04: R ~ PD-L1", "PD-L1",
                                                              ifelse(rocs_schemes_merged_DF.sub$Model=="M05: R ~ TIS GEP", "TIS GEP",ifelse(rocs_schemes_merged_DF.sub$Model=="M32 (RFE-NB): R ~ .", "RFE-optimal",                                   ifelse(rocs_schemes_merged_DF.sub$Model=="M32 (RFE-SVM Radial): R ~ .", "RFE-optimal",ifelse(rocs_schemes_merged_DF.sub$Model=="M32 (RFE-SVM Linear)" , "RFE-optimal",rocs_schemes_merged_DF.sub$Model))))))))


rocs_schemes_merged_DF.sub$Model <- factor(rocs_schemes_merged_DF.sub$Model, levels=unique(rocs_schemes_merged_DF.sub$Model))
roc_meansByModel <-aggregate( `ROC-AUC` ~ Model, data = rocs_schemes_merged_DF.sub, FUN = mean) %>% arrange(desc(`ROC-AUC`))
roc_meansByModel
##          Model   ROC-AUC
## 1  RFE-optimal 0.7426960
## 2      TIS GEP 0.6462104
## 3       logTMB 0.6448493
## 4 TCR Richness 0.6412568
## 5 BCR Richness 0.6221735
## 6        PD-L1 0.5955789
rocs_schemes_merged_DF.sub$Model <- factor(rocs_schemes_merged_DF.sub$Model, levels=roc_meansByModel$Model %>% as.character() %>% rev())
fig5 <-myBubblePlot(rocs_schemes_merged_DF.sub,"Model", "scheme", "Sample_test", "ROC-AUC" )
fig5 <- fig5+theme(plot.margin = unit(c(0, 0, 0, 0), "cm"),axis.title=element_blank(),axis.ticks.length = unit(0, "pt"))

# Scheme names as panels
p2 <- tibble(ymin = c(0, 1,2), ymax = c(1,2, 7), fill = c("SKCM-specific\n(Scheme 3)", "BLCA-specific\n(Scheme 2)", "Histology non-specific\n(Scheme 1)")) %>% 
  ggplot() +
  geom_rect(aes(xmin = 0, xmax = 1, ymin = ymin, ymax = ymax), color="grey50", fill="white") + #, fill = fill
  geom_text(aes(x = .5, y = (ymin  + ymax) / 2, label = fill, fontface="bold", family="Palatino Linotype"), angle = 90,size=4.5) +
  # scale_y_reverse(breaks = seq(1, 10), expand = expansion(mult = c(0, 0))) +
  scale_y_continuous(breaks = seq(1, 7), expand = expansion(mult = c(0, 0))) +
  scale_x_continuous(breaks = c(0), expand = expansion(mult = c(0, 0))) +
  guides(fill = FALSE) + labs(x="", y="") + 
  theme_bw(base_family = "Palatino Linotype", base_size = 20) +
theme(axis.text.x=element_blank(),
      axis.ticks.x=element_blank(),
      axis.text.y=element_blank(),
      axis.ticks.y=element_blank())

fig5.f <-p2 + fig5 + patchwork::plot_layout(widths = c(0.7, 9.3))
# fig5.f
# Model names as panels
p3 <- tibble(xmin = c(0, 1,2,3,4,5), xmax = c(1,2,3,4,5,6), fill = c("TCR Richness", "BCR Richness", "LogTMB", "PD-L1", "TIS-GEP", "RFE-optimal")) %>% 
  ggplot() +
  geom_rect(aes(ymin = 0, ymax = 1, xmin = xmin, xmax = xmax), color="grey50", fill="white") + #, fill = fill
  geom_text(aes(y = .5, x = (xmin  + xmax) / 2, label = fill, fontface="bold", family="Palatino Linotype"), angle = 0,size=4.5) +
  # scale_y_reverse(breaks = seq(1, 10), expand = expansion(mult = c(0, 0))) +
  scale_x_continuous(breaks = seq(1, 10), expand = expansion(mult = c(0, 0))) +
  scale_y_continuous(breaks = c(0), expand = expansion(mult = c(0, 0))) +
  guides(fill = FALSE) + labs(x="", y="") + 
  theme_bw(base_family = "Palatino Linotype", base_size = 20) +
theme(axis.text.x=element_blank(),
      axis.ticks.x=element_blank(),
      axis.text.y=element_blank(),
      axis.ticks.y=element_blank(),plot.margin = unit(c(0, 0, -2, 0), "cm"))

# p3

# Models type as panels
p4 <- tibble(xmin = c(0,4.8), xmax = c(4.8,6), fill = c("Univariable Models", "Multivariable Models")) %>% 
  ggplot() +
  geom_rect(aes(ymin = 0, ymax = 1, xmin = xmin, xmax = xmax), color="grey50", fill="white") + #, fill = fill
  geom_text(aes(y = .5, x = (xmin  + xmax) / 2, label = fill, fontface="bold", family="Palatino Linotype"), angle = 0,size=4.5) +
  # scale_y_reverse(breaks = seq(1, 10), expand = expansion(mult = c(0, 0))) +
  #scale_x_continuous(breaks = seq(0, 6)) +
  scale_x_continuous(breaks = seq(0, 6),limits=c(0,6), expand = expansion(mult = c(.2, 0))) +
  scale_y_continuous(breaks = c(0), expand = expansion(mult = c(0, 0))) +
  guides(fill = FALSE) + labs(x="", y="") + 
  theme_bw(base_family = "Palatino Linotype", base_size = 20) +
theme(axis.text.x=element_blank(),
      axis.ticks.x=element_blank(),
      axis.text.y=element_blank(),
      axis.ticks.y=element_blank(),plot.margin = unit(c(-2, -2, -2, -2), "cm"))

# p4


p5 <- tibble(xmin = c(0,4.8), xmax = c(4.8,6), fill = c("Univariable Models", "Multivariable Models")) %>% 
  ggplot() +
  geom_rect(aes(ymin = 0, ymax = 1, xmin = xmin, xmax = xmax), color="grey50", fill="white") + #, fill = fill
  geom_text(aes(y = .5, x = (xmin  + xmax) / 2, label = fill, fontface="bold", family="Palatino Linotype"), angle = 0,size=4.5) +
  # scale_y_reverse(breaks = seq(1, 10), expand = expansion(mult = c(0, 0))) +
  #scale_x_continuous(breaks = seq(0, 6)) +
  scale_x_continuous(breaks = seq(0, 6),limits=c(0,6), expand = expansion(mult = c(0, 0))) +
  scale_y_continuous(breaks = c(0), expand = expansion(mult = c(0, 0))) +
  guides(fill = FALSE) + labs(x="", y="") + 
  theme_bw(base_family = "Palatino Linotype", base_size = 20) +
theme(axis.text.x=element_blank(),
      axis.ticks.x=element_blank(),
      axis.text.y=element_blank(),
      axis.ticks.y=element_blank(),plot.margin = unit(c(-2, -2, -2, -2), "cm"),axis.title=element_blank(),axis.ticks.length = unit(0, "pt"))

# p5

# ROC AUC with models names as panels


fig5.final<-(patchwork::plot_spacer()+p5+ patchwork::plot_layout(widths = c(1.4, 8.6)))/(p2 + fig5 + patchwork::plot_layout(widths = c(0.7, 9.3))) + patchwork::plot_layout(heights = c(0.4, 9.6))
fig5.final

png 2

24 SUPPLEMENTAL FIGURE 9 (FILE 13): Predictive modeling workflow

Produced using Visio.

25 SUPPLEMENTAL FIGURE 10 (FILE 14) : ROC-AUC comparisons of uni and multivariate RFE models performance

melted_mat.all2 <- melted_mat.all %>% dplyr::filter(Var1 %in% keep_models) %>% dplyr::filter(Var2 %in% keep_models) %>% droplevels()
melted_mat.blad2 <- melted_mat.blad %>% dplyr::filter(Var1 %in% keep_models) %>% dplyr::filter(Var2 %in% keep_models) %>% droplevels()
melted_mat.mel2 <- melted_mat.mel %>% dplyr::filter(Var1 %in% keep_models) %>% dplyr::filter(Var2 %in% keep_models) %>% droplevels()
melted_mat.rcc2 <- melted_mat.rcc %>% dplyr::filter(Var1 %in% keep_models) %>% dplyr::filter(Var2 %in% keep_models) %>% droplevels()
melted_mat.gas2 <- melted_mat.gas %>% dplyr::filter(Var1 %in% keep_models) %>% dplyr::filter(Var2 %in% keep_models) %>% droplevels()



melted_mat.blad.spec$Var1 <- ifelse(as.character(melted_mat.blad.spec$Var1)=="M32 (RFE-SVM Radial): R ~ logTMB + PDL1 + BCR richness + TIS + B cells + TCR richness + CD8 cells + CD4 cells + Tumor Purity","M32 (RFE-SVM Radial): R ~ .",as.character(melted_mat.blad.spec$Var1))
melted_mat.blad.spec$Var2 <- ifelse(as.character(melted_mat.blad.spec$Var2)=="M32 (RFE-SVM Radial): R ~ logTMB + PDL1 + BCR richness + TIS + B cells + TCR richness + CD8 cells + CD4 cells + Tumor Purity","M32 (RFE-SVM Radial): R ~ .",as.character(melted_mat.blad.spec$Var2))

melted_mat.blad.spec2 <- melted_mat.blad.spec %>% dplyr::filter(Var1 %in% keep_models) %>% dplyr::filter(Var2 %in% keep_models) %>% droplevels()

melted_mat.mel.spec$Var1 <- ifelse(as.character(melted_mat.mel.spec$Var1)=="M32 (RFE-SVM Linear): R ~ logTMB + B cells + BCR richness + TCR richness + Tumor Purity","M32 (RFE-SVM Linear)",as.character(melted_mat.mel.spec$Var1))
melted_mat.mel.spec$Var2 <- ifelse(as.character(melted_mat.mel.spec$Var2)=="M32 (RFE-SVM Linear): R ~ logTMB + B cells + BCR richness + TCR richness + Tumor Purity","M32 (RFE-SVM Linear)",as.character(melted_mat.mel.spec$Var2))

melted_mat.mel.spec2 <- melted_mat.mel.spec %>% dplyr::filter(Var1 %in% keep_models) %>% dplyr::filter(Var2 %in% keep_models) %>% droplevels()

rocsCompare.scheme1_all <-corHeatmap2(melted_mat.all2) 
rocsCompare.scheme1_blad <-corHeatmap2(melted_mat.blad2) 
rocsCompare.scheme1_mel <-corHeatmap2(melted_mat.mel2) 
rocsCompare.scheme1_rcc <-corHeatmap2(melted_mat.rcc2) 
rocsCompare.scheme1_gas <-corHeatmap2(melted_mat.gas2) 
rocsCompare.scheme2 <-corHeatmap2(melted_mat.blad.spec2) 
rocsCompare.scheme3 <-corHeatmap2(melted_mat.mel.spec2) 

png 2

26 Tables with HR ratios

27 SUPPLEMENTAL TABLE S3-6

28 Session

session_info <- sessionInfo()
writeLines(capture.output(session_info), paste0(sessionInfoPath,"F_paper_figures_tables_files.txt"))

sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19045)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United States.1252 
## [2] LC_CTYPE=English_United States.1252   
## [3] LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.1252    
## 
## attached base packages:
## [1] stats4    grid      stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] officer_0.4.4              GOfuncR_1.14.0            
##  [3] vioplot_0.3.7              sm_2.2-5.7                
##  [5] openxlsx_4.2.5             DOSE_3.20.1               
##  [7] multienrichjam_0.0.62.900  GO.db_3.14.0              
##  [9] data.table_1.14.2          rrvgo_1.6.0               
## [11] org.Hs.eg.db_3.14.0        AnnotationDbi_1.56.2      
## [13] IRanges_2.28.0             S4Vectors_0.32.4          
## [15] Biobase_2.54.0             BiocGenerics_0.40.0       
## [17] clusterProfiler_4.2.2      intergraph_2.0-2          
## [19] ggraph_2.0.5               tidygraph_1.2.2           
## [21] ggnetwork_0.5.10           ggnet_0.1.0               
## [23] corrr_0.4.4                xlsx_0.6.5                
## [25] plyr_1.8.7                 tidyquant_1.0.6           
## [27] quantmod_0.4.20            TTR_0.24.3                
## [29] PerformanceAnalytics_2.0.4 xts_0.12.1                
## [31] zoo_1.8-10                 lubridate_1.8.0           
## [33] ggbeeswarm_0.6.0           rstatix_0.7.0             
## [35] reshape2_1.4.4             readxl_1.4.0              
## [37] flextable_0.7.0            reconPlots_0.1.0          
## [39] precrec_0.12.9             caret_6.0-92              
## [41] lattice_0.20-45            survminer_0.4.9           
## [43] survival_3.3-1             pander_0.6.5              
## [45] RColorBrewer_1.1-3         ggrepel_0.9.1             
## [47] ggpubr_0.4.0               forcats_0.5.1             
## [49] stringr_1.4.0              purrr_0.3.4               
## [51] readr_2.1.2                tidyr_1.2.0               
## [53] tidyverse_1.3.1            Cairo_1.5-15              
## [55] cowplot_1.1.1              gridExtra_2.3             
## [57] hrbrthemes_0.8.0           ggplot2_3.3.6             
## [59] tibble_3.1.7               dplyr_1.0.9               
## [61] DT_0.22                    extrafont_0.18            
## [63] pacman_0.5.1              
## 
## loaded via a namespace (and not attached):
##   [1] statnet.common_4.6.0   Hmisc_4.7-0            class_7.3-20          
##   [4] foreach_1.5.2          crayon_1.5.2           MASS_7.3-57           
##   [7] nlme_3.1-157           backports_1.4.1        reprex_2.0.1          
##  [10] GOSemSim_2.20.0        rlang_1.1.3            XVector_0.34.0        
##  [13] extrafontdb_1.0        BiocParallel_1.28.3    rjson_0.2.21          
##  [16] bit64_4.0.5            glue_1.6.2             pheatmap_1.0.12       
##  [19] parallel_4.1.0         vipor_0.4.5            haven_2.5.1           
##  [22] tidyselect_1.1.2       km.ci_0.5-6            xtable_1.8-4          
##  [25] magrittr_2.0.3         evaluate_0.18          gdtools_0.2.4         
##  [28] cli_3.6.2              zlibbioc_1.40.0        miniUI_0.1.1.1        
##  [31] rstudioapi_0.13        bslib_0.3.1            rpart_4.1.16          
##  [34] fastmatch_1.1-3        wordcloud_2.6          treeio_1.18.1         
##  [37] shiny_1.7.1            xfun_0.35              tm_0.7-8              
##  [40] clue_0.3-60            cluster_2.1.2          KEGGREST_1.34.0       
##  [43] mapplots_1.5.1         ape_5.6-2              listenv_0.8.0         
##  [46] xlsxjars_0.6.1         Biostrings_2.62.0      png_0.1-7             
##  [49] future_1.26.1          ipred_0.9-13           withr_2.5.0           
##  [52] bitops_1.0-7           slam_0.1-50            ggforce_0.3.3         
##  [55] cellranger_1.1.0       hardhat_1.2.0          pROC_1.18.0           
##  [58] coda_0.19-4            pillar_1.8.0           GlobalOptions_0.1.2   
##  [61] cachem_1.0.6           broom.helpers_1.9.0    fs_1.5.2              
##  [64] kernlab_0.9-30         NLP_0.2-1              GetoptLong_1.0.5      
##  [67] vctrs_0.4.1            ellipsis_0.3.2         generics_0.1.3        
##  [70] lava_1.6.10            tools_4.1.0            questionr_0.7.7       
##  [73] foreign_0.8-82         beeswarm_0.4.0         munsell_0.5.0         
##  [76] tweenr_1.0.2           fgsea_1.20.0           fastmap_1.1.0         
##  [79] compiler_4.1.0         abind_1.4-5            httpuv_1.6.5          
##  [82] gt_0.8.0               rJava_1.0-6            GenomeInfoDbData_1.2.7
##  [85] prodlim_2019.11.13     deldir_1.0-6           utf8_1.2.2            
##  [88] later_1.3.0            recipes_1.0.1          Quandl_2.11.0         
##  [91] survivalAnalysis_0.3.0 jsonlite_1.8.3         scales_1.2.0          
##  [94] tidytree_0.4.1         carData_3.0-5          lazyeval_0.2.2        
##  [97] promises_1.2.0.1       car_3.1-1              doParallel_1.0.17     
## [100] latticeExtra_0.6-30    checkmate_2.1.0        rmarkdown_2.14        
## [103] klaR_1.7-0             downloader_0.4         treemap_2.4-3         
## [106] igraph_1.3.1           yaml_2.3.6             gtsummary_1.6.2       
## [109] systemfonts_1.0.4      htmltools_0.5.3        memoise_2.0.1         
## [112] graphlayouts_0.8.0     quadprog_1.5-8         viridisLite_0.4.1     
## [115] digest_0.6.29          assertthat_0.2.1       mime_0.12             
## [118] Rttf2pt1_1.3.10        KMsurv_0.1-5           RSQLite_2.2.13        
## [121] yulab.utils_0.0.4      future.apply_1.9.0     blob_1.2.3            
## [124] tidytidbits_0.3.2      survMisc_0.5.6         labeling_0.4.2        
## [127] splines_4.1.0          Formula_1.2-4          RCurl_1.98-1.6        
## [130] broom_1.0.1            hms_1.1.2              modelr_0.1.8          
## [133] colorspace_2.0-3       base64enc_0.1-3        GenomicRanges_1.46.1  
## [136] shape_1.4.6            aplot_0.1.4            nnet_7.3-17           
## [139] sass_0.4.1             Rcpp_1.0.9             circlize_0.4.14       
## [142] enrichplot_1.14.2      fansi_1.0.3            tzdb_0.3.0            
## [145] parallelly_1.32.1      ModelMetrics_1.2.2.2   R6_2.5.1              
## [148] lifecycle_1.0.1        labelled_2.9.1         zip_2.2.0             
## [151] curl_5.2.0             ggsignif_0.6.3         jquerylib_0.1.4       
## [154] DO.db_2.9              Matrix_1.4-0           qvalue_2.26.0         
## [157] iterators_1.0.14       gower_1.0.0            htmlwidgets_1.5.4     
## [160] polyclip_1.10-0        network_1.17.1         shadowtext_0.1.2      
## [163] gridGraphics_0.5-1     rvest_1.0.2            ComplexHeatmap_2.10.0 
## [166] globals_0.15.1         htmlTable_2.4.1        patchwork_1.1.2.9000  
## [169] codetools_0.2-18       matrixStats_0.62.0     gtools_3.9.3          
## [172] dbplyr_2.1.1           gridBase_0.4-7         GenomeInfoDb_1.30.1   
## [175] gtable_0.3.1           DBI_1.1.2              highr_0.9             
## [178] ggfun_0.0.6            httr_1.4.7             stringi_1.7.8         
## [181] farver_2.1.1           uuid_1.1-0             viridis_0.6.2         
## [184] timeDate_4021.104      ggtree_3.2.1           jamba_0.0.87.900      
## [187] combinat_0.0-8         xml2_1.3.3             interp_1.1-3          
## [190] ggplotify_0.1.0        bit_4.0.5              scatterpie_0.1.7      
## [193] jpeg_0.1-9             pkgconfig_2.0.3        knitr_1.41